# 1. Data Exploration

## 1.1. Importing Libraries & Reading Data

In [1]:
import pandas as pd
import itertools as it
import scipy.stats as sp

from scipy.stats import chi2_contingency
from scipy.stats import chi2

import numpy as np
import matplotlib.pyplot as plt

In [5]:
df = pd.read_csv('data/raw_training_data.csv')

In [3]:
columns = ["genre", "company", "country", "director", "rating", "released", "isprofit"]

Our data has the following columns that we can use to conduct the chi-square test on:
* Genre
* Company
* Country
* Director
* Rating
* Released
* Isprofit    
<p>
> We want to find out whether two categorical attributes are independent or dependent by using the chi-square test. We do this by getting all combinations of length two from the list above and conducting the chi-square test on each of the categorical pairs.
<br><br>
> After writing some general formulas, we will conduct the test on all 21 combinations.
</p>

## 1.2 Chi-square Test

In [7]:

# Get all combinations of columns array of length 2, C(7, 2) = 21 combinations
comb = it.combinations(columns, 2) 

for i in list(comb): 
    # Contingency Table
    table = pd.crosstab(df[i[0]], df[i[1]])
    display(table)
    
    # Observed Values
    Observed_Values = table.values
    print("Observed Values")
    display(Observed_Values)
    
    # Calculate Chi-Squared test
    # Returns of scipy.state.chi2_contigency()
    # chi2: The test statistic
    # p: The p-value of the test
    # dof: Degrees of freedom
    # expected: The expected frequencies, based on the marginal sums of the table
    
    chi2_test_statistic, p, dof, expected = sp.chi2_contingency(table)
    print("chi2_test_statistic, p, dof, expected")
    display(chi2_test_statistic, p, dof, expected)
    
    # interpret test statistic
    
    # Test Statistic >= Critical Value: reject null hypotheses, dependent (Ha)
    # Test Statistic < Critical Value: fail to reject null hypotheses, independent (Ho)
    # chi2.ppf(q, df, loc=0, scale=1) inverset CDF
    
    prob = 0.95 # significant value = 1 - 0.95 = 0.05
    critical = chi2.ppf(prob, dof)
    print("critical = %.3f, chi2_test_statistic = %.3f" % (critical, chi2_test_statistic))
    
    if chi2_test_statistic >= critical:
        print("Dependent (reject H0)\n")
    else:
        print("Independent (fail to reject H0)\n")
        
    # interpret p-value
    # p-value <= alpha: reject null hypothesis, dependent (Ha)
    # p-value: > alpha: fail to reject null hypothesis, independent (Ho)
    alpha = 1.0 - prob
    print("significance = %.3f, p = %.3f" % (alpha, p))
    if p <= alpha:
        print("Dependent (reject H0)")
    else: 
        print("Independent (fail to reject H0)")

company,Castle Rock Entertainment,Columbia Pictures,Columbia Pictures Corporation,Dimension Films,DreamWorks,Fox 2000 Pictures,Fox Searchlight Pictures,Hollywood Pictures,Metro-Goldwyn-Mayer (MGM),Miramax,...,Other,Paramount Pictures,Screen Gems,Summit Entertainment,Touchstone Pictures,TriStar Pictures,Twentieth Century Fox Film Corporation,Universal Pictures,Walt Disney Pictures,Warner Bros.
genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,0,0,0,0,0,0,0,0,0,...,9,0,0,0,0,0,0,0,0,0
Action,6,28,36,6,10,3,1,7,11,3,...,348,57,8,11,16,8,50,58,16,79
Adventure,2,5,7,1,4,7,3,2,3,2,...,98,10,1,6,5,0,15,11,16,15
Animation,1,10,1,0,3,4,0,0,0,0,...,98,9,0,0,3,0,7,5,14,12
Biography,0,6,8,0,3,3,6,3,1,1,...,127,1,0,0,6,2,1,17,3,4
Comedy,18,19,30,12,21,18,16,12,26,21,...,556,54,8,5,28,21,37,76,19,42
Crime,4,6,7,2,2,1,1,2,4,8,...,156,15,4,1,2,5,3,11,0,17
Drama,6,9,22,1,15,10,7,4,13,10,...,358,20,7,6,16,3,12,20,1,36
Family,0,0,0,0,0,0,0,0,0,0,...,2,0,0,0,0,0,0,0,0,0
Fantasy,0,0,0,0,0,0,0,0,0,0,...,8,4,0,0,0,2,0,1,0,0


Observed Values


array([[  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   9,   0,
          0,   0,   0,   0,   0,   0,   0,   0],
       [  6,  28,  36,   6,  10,   3,   1,   7,  11,   3,  32, 348,  57,
          8,  11,  16,   8,  50,  58,  16,  79],
       [  2,   5,   7,   1,   4,   7,   3,   2,   3,   2,  10,  98,  10,
          1,   6,   5,   0,  15,  11,  16,  15],
       [  1,  10,   1,   0,   3,   4,   0,   0,   0,   0,   0,  98,   9,
          0,   0,   3,   0,   7,   5,  14,  12],
       [  0,   6,   8,   0,   3,   3,   6,   3,   1,   1,   1, 127,   1,
          0,   0,   6,   2,   1,  17,   3,   4],
       [ 18,  19,  30,  12,  21,  18,  16,  12,  26,  21,  35, 556,  54,
          8,   5,  28,  21,  37,  76,  19,  42],
       [  4,   6,   7,   2,   2,   1,   1,   2,   4,   8,  11, 156,  15,
          4,   1,   2,   5,   3,  11,   0,  17],
       [  6,   9,  22,   1,  15,  10,   7,   4,  13,  10,  13, 358,  20,
          7,   6,  16,   3,  12,  20,   1,  36],
       [  0,   0,   0,  

chi2_test_statistic, p, dof, expected


632.6359770327847

6.867936576941464e-26

300

array([[9.44948922e-02, 2.24744608e-01, 2.98808173e-01, 8.68331442e-02,
        1.55788876e-01, 1.17480136e-01, 8.93870602e-02, 7.91713961e-02,
        1.53234960e-01, 1.14926220e-01, 3.03916005e-01, 4.79370034e+00,
        4.52043133e-01, 8.17253121e-02, 7.91713961e-02, 1.94097616e-01,
        1.04710556e-01, 3.26901249e-01, 5.31214529e-01, 1.76220204e-01,
        5.41430193e-01],
       [8.33654938e+00, 1.98274688e+01, 2.63615210e+01, 7.66061294e+00,
        1.37440409e+01, 1.03643587e+01, 7.88592509e+00, 6.98467650e+00,
        1.35187287e+01, 1.01390465e+01, 2.68121453e+01, 4.22910897e+02,
        3.98802497e+01, 7.20998865e+00, 6.98467650e+00, 1.71237230e+01,
        9.23779796e+00, 2.88399546e+01, 4.68649262e+01, 1.55465380e+01,
        4.77661748e+01],
       [2.34137344e+00, 5.56867196e+00, 7.40380250e+00, 2.15153235e+00,
        3.86010216e+00, 2.91089671e+00, 2.21481271e+00, 1.96169126e+00,
        3.79682179e+00, 2.84761635e+00, 7.53036322e+00, 1.18777242e+02,
        1.1200

critical = 341.395, chi2_test_statistic = 632.636
Dependent (reject H0)

significance = 0.050, p = 0.000
Dependent (reject H0)


country,Other,USA
genre,Unnamed: 1_level_1,Unnamed: 2_level_1
0,9,0
Action,100,694
Adventure,38,185
Animation,18,149
Biography,60,133
Comedy,101,973
Crime,29,233
Drama,117,472
Family,0,3
Fantasy,1,18


Observed Values


array([[  9,   0],
       [100, 694],
       [ 38, 185],
       [ 18, 149],
       [ 60, 133],
       [101, 973],
       [ 29, 233],
       [117, 472],
       [  0,   3],
       [  1,  18],
       [ 16, 132],
       [  3,  21],
       [  2,   2],
       [  1,   7],
       [  1,   5],
       [  1,   0]], dtype=int64)

chi2_test_statistic, p, dof, expected


156.57229777650653

1.1876526875042708e-25

15

array([[1.26929625e+00, 7.73070375e+00],
       [1.11980136e+02, 6.82019864e+02],
       [3.14503405e+01, 1.91549659e+02],
       [2.35524972e+01, 1.43447503e+02],
       [2.72193530e+01, 1.65780647e+02],
       [1.51469353e+02, 9.22530647e+02],
       [3.69506243e+01, 2.25049376e+02],
       [8.30683882e+01, 5.05931612e+02],
       [4.23098751e-01, 2.57690125e+00],
       [2.67962543e+00, 1.63203746e+01],
       [2.08728717e+01, 1.27127128e+02],
       [3.38479001e+00, 2.06152100e+01],
       [5.64131669e-01, 3.43586833e+00],
       [1.12826334e+00, 6.87173666e+00],
       [8.46197503e-01, 5.15380250e+00],
       [1.41032917e-01, 8.58967083e-01]])

critical = 24.996, chi2_test_statistic = 156.572
Dependent (reject H0)

significance = 0.050, p = 0.000
Dependent (reject H0)


director,Barry Levinson,Bruce Beresford,Clint Eastwood,Dennis Dugan,Martin Scorsese,Michael Apted,Oliver Stone,Other,Renny Harlin,Richard Donner,...,Ridley Scott,Rob Reiner,Robert Zemeckis,Ron Howard,Spike Lee,Steven Soderbergh,Steven Spielberg,Tim Burton,Wes Craven,Woody Allen
genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0,0,0,0,0,0,0,9,0,0,...,0,0,0,0,0,0,0,0,0,0
Action,0,0,5,3,0,1,1,749,10,9,...,5,0,1,3,1,0,4,1,0,0
Adventure,1,0,1,0,1,1,0,197,0,0,...,4,3,4,3,0,0,5,3,0,0
Animation,0,0,0,0,0,0,0,162,0,0,...,0,0,2,0,0,0,1,1,0,0
Biography,2,1,6,0,2,2,5,165,0,0,...,1,0,0,3,0,2,2,2,0,0
Comedy,4,3,0,8,0,1,0,994,0,1,...,2,7,2,6,7,3,1,3,3,24
Crime,2,1,3,0,3,4,2,235,0,0,...,2,1,0,0,3,5,1,0,0,0
Drama,3,5,5,0,3,1,6,531,0,1,...,1,2,3,0,4,7,5,1,1,4
Family,0,0,0,0,0,0,0,3,0,0,...,0,0,0,0,0,0,0,0,0,0
Fantasy,0,0,0,0,0,0,0,16,0,0,...,0,0,0,0,0,0,0,1,2,0


Observed Values


array([[  0,   0,   0,   0,   0,   0,   0,   9,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   5,   3,   0,   1,   1, 749,  10,   9,   1,   5,   0,
          1,   3,   1,   0,   4,   1,   0,   0],
       [  1,   0,   1,   0,   1,   1,   0, 197,   0,   0,   0,   4,   3,
          4,   3,   0,   0,   5,   3,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0, 162,   0,   0,   1,   0,   0,
          2,   0,   0,   0,   1,   1,   0,   0],
       [  2,   1,   6,   0,   2,   2,   5, 165,   0,   0,   0,   1,   0,
          0,   3,   0,   2,   2,   2,   0,   0],
       [  4,   3,   0,   8,   0,   1,   0, 994,   0,   1,   5,   2,   7,
          2,   6,   7,   3,   1,   3,   3,  24],
       [  2,   1,   3,   0,   3,   4,   2, 235,   0,   0,   0,   2,   1,
          0,   0,   3,   5,   1,   0,   0,   0],
       [  3,   5,   5,   0,   3,   1,   6, 531,   0,   1,   6,   1,   2,
          3,   0,   4,   7,   5,   1,   1,   4],
       [  0,   0,   0,  

chi2_test_statistic, p, dof, expected


561.0515094746818

4.588799305328769e-18

300

array([[3.06469921e-02, 2.55391600e-02, 5.10783201e-02, 2.80930760e-02,
        2.55391600e-02, 2.55391600e-02, 3.57548241e-02, 8.27724177e+00,
        3.06469921e-02, 2.80930760e-02, 3.32009081e-02, 3.83087401e-02,
        3.32009081e-02, 3.06469921e-02, 4.34165721e-02, 3.83087401e-02,
        4.34165721e-02, 4.85244041e-02, 3.06469921e-02, 3.06469921e-02,
        7.15096481e-02],
       [2.70374574e+00, 2.25312145e+00, 4.50624291e+00, 2.47843360e+00,
        2.25312145e+00, 2.25312145e+00, 3.15437003e+00, 7.30236663e+02,
        2.70374574e+00, 2.47843360e+00, 2.92905789e+00, 3.37968218e+00,
        2.92905789e+00, 2.70374574e+00, 3.83030647e+00, 3.37968218e+00,
        3.83030647e+00, 4.28093076e+00, 2.70374574e+00, 2.70374574e+00,
        6.30874007e+00],
       [7.59364359e-01, 6.32803632e-01, 1.26560726e+00, 6.96083995e-01,
        6.32803632e-01, 6.32803632e-01, 8.85925085e-01, 2.05091657e+02,
        7.59364359e-01, 6.96083995e-01, 8.22644722e-01, 9.49205448e-01,
        8.2264

critical = 341.395, chi2_test_statistic = 561.052
Dependent (reject H0)

significance = 0.050, p = 0.000
Dependent (reject H0)


rating,0,G,NC-17,NOT RATED,Not specified,PG,PG-13,R,UNRATED
genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,9,0,0,0,0,0,0,0,0
Action,0,0,0,1,2,67,362,362,0
Adventure,0,11,0,0,0,90,77,45,0
Animation,0,47,0,0,0,109,6,5,0
Biography,0,1,0,2,0,27,64,97,2
Comedy,0,8,3,7,0,177,405,470,4
Crime,0,0,1,0,0,0,26,233,2
Drama,0,1,1,12,1,40,215,314,5
Family,0,1,0,0,0,2,0,0,0
Fantasy,0,0,0,1,0,1,0,17,0


Observed Values


array([[  9,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   1,   2,  67, 362, 362,   0],
       [  0,  11,   0,   0,   0,  90,  77,  45,   0],
       [  0,  47,   0,   0,   0, 109,   6,   5,   0],
       [  0,   1,   0,   2,   0,  27,  64,  97,   2],
       [  0,   8,   3,   7,   0, 177, 405, 470,   4],
       [  0,   0,   1,   0,   0,   0,  26, 233,   2],
       [  0,   1,   1,  12,   1,  40, 215, 314,   5],
       [  0,   1,   0,   0,   0,   2,   0,   0,   0],
       [  0,   0,   0,   1,   0,   1,   0,  17,   0],
       [  0,   0,   0,   0,   0,   0,  29, 119,   0],
       [  0,   0,   0,   1,   0,   0,  10,  13,   0],
       [  0,   0,   0,   0,   0,   2,   2,   0,   0],
       [  0,   0,   0,   0,   0,   0,   6,   2,   0],
       [  0,   0,   0,   0,   0,   1,   0,   5,   0],
       [  0,   0,   0,   0,   0,   0,   0,   1,   0]], dtype=int64)

chi2_test_statistic, p, dof, expected


5155.835279456919

0.0

120

array([[2.29852440e-02, 1.76220204e-01, 1.27695800e-02, 6.12939841e-02,
        7.66174801e-03, 1.31782066e+00, 3.06980704e+00, 4.29824064e+00,
        3.32009081e-02],
       [2.02780931e+00, 1.55465380e+01, 1.12656073e+00, 5.40749149e+00,
        6.75936436e-01, 1.16261067e+02, 2.70825199e+02, 3.79200341e+02,
        2.92905789e+00],
       [5.69523269e-01, 4.36634506e+00, 3.16401816e-01, 1.51872872e+00,
        1.89841090e-01, 3.26526674e+01, 7.60629966e+01, 1.06500851e+02,
        8.22644722e-01],
       [4.26503973e-01, 3.26986379e+00, 2.36946652e-01, 1.13734393e+00,
        1.42167991e-01, 2.44528944e+01, 5.69619750e+01, 7.97562429e+01,
        6.16061294e-01],
       [4.92905789e-01, 3.77894438e+00, 2.73836549e-01, 1.31441544e+00,
        1.64301930e-01, 2.82599319e+01, 6.58303065e+01, 9.21733825e+01,
        7.11975028e-01],
       [2.74290579e+00, 2.10289444e+01, 1.52383655e+00, 7.31441544e+00,
        9.14301930e-01, 1.57259932e+02, 3.66330306e+02, 5.12923383e+02,
        3.9

critical = 146.567, chi2_test_statistic = 5155.835
Dependent (reject H0)

significance = 0.050, p = 0.000
Dependent (reject H0)


released,0,1.01.2004 00:00,1.02.1991 00:00,1.02.2002 00:00,1.02.2008 00:00,1.02.2013 00:00,1.03.1991 00:00,1.03.1996 00:00,1.03.2002 00:00,1.03.2013 00:00,...,9.11.1994 00:00,9.11.2001 00:00,9.11.2005 00:00,9.11.2007 00:00,9.11.2012 00:00,9.12.1988 00:00,9.12.1994 00:00,9.12.2005 00:00,9.12.2011 00:00,9.12.2016 00:00
genre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Action,0,0,0,0,0,1,0,0,1,0,...,0,0,0,0,1,0,1,0,0,0
Adventure,0,0,0,0,1,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
Animation,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Biography,0,0,0,0,0,0,1,0,0,0,...,0,0,1,0,0,0,0,0,0,0
Comedy,0,1,2,2,1,2,0,1,1,1,...,1,1,0,1,0,1,0,0,1,1
Crime,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
Drama,0,0,0,0,0,0,0,1,1,0,...,0,1,0,1,0,0,1,1,1,0
Family,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Fantasy,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Observed Values


array([[9, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 1, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

chi2_test_statistic, p, dof, expected


29127.625975806837

2.0946853130285455e-27

26565

array([[2.29852440e-02, 2.55391600e-03, 5.10783201e-03, ...,
        5.10783201e-03, 5.10783201e-03, 5.10783201e-03],
       [2.02780931e+00, 2.25312145e-01, 4.50624291e-01, ...,
        4.50624291e-01, 4.50624291e-01, 4.50624291e-01],
       [5.69523269e-01, 6.32803632e-02, 1.26560726e-01, ...,
        1.26560726e-01, 1.26560726e-01, 1.26560726e-01],
       ...,
       [2.04313280e-02, 2.27014756e-03, 4.54029512e-03, ...,
        4.54029512e-03, 4.54029512e-03, 4.54029512e-03],
       [1.53234960e-02, 1.70261067e-03, 3.40522134e-03, ...,
        3.40522134e-03, 3.40522134e-03, 3.40522134e-03],
       [2.55391600e-03, 2.83768445e-04, 5.67536890e-04, ...,
        5.67536890e-04, 5.67536890e-04, 5.67536890e-04]])

critical = 26945.271, chi2_test_statistic = 29127.626
Dependent (reject H0)

significance = 0.050, p = 0.000
Dependent (reject H0)


isprofit,0,1
genre,Unnamed: 1_level_1,Unnamed: 2_level_1
0,9,0
Action,426,368
Adventure,101,122
Animation,64,103
Biography,108,85
Comedy,474,600
Crime,140,122
Drama,327,262
Family,3,0
Fantasy,5,14


Observed Values


array([[  9,   0],
       [426, 368],
       [101, 122],
       [ 64, 103],
       [108,  85],
       [474, 600],
       [140, 122],
       [327, 262],
       [  3,   0],
       [  5,  14],
       [ 48, 100],
       [  9,  15],
       [  2,   2],
       [  4,   4],
       [  4,   2],
       [  1,   0]], dtype=int64)

chi2_test_statistic, p, dof, expected


77.43533448482708

2.0497069292208742e-10

15

array([[4.40550511e+00, 4.59449489e+00],
       [3.88663451e+02, 4.05336549e+02],
       [1.09158627e+02, 1.13841373e+02],
       [8.17465948e+01, 8.52534052e+01],
       [9.44736095e+01, 9.85263905e+01],
       [5.25723610e+02, 5.48276390e+02],
       [1.28249149e+02, 1.33750851e+02],
       [2.88315834e+02, 3.00684166e+02],
       [1.46850170e+00, 1.53149830e+00],
       [9.30051078e+00, 9.69948922e+00],
       [7.24460840e+01, 7.55539160e+01],
       [1.17480136e+01, 1.22519864e+01],
       [1.95800227e+00, 2.04199773e+00],
       [3.91600454e+00, 4.08399546e+00],
       [2.93700341e+00, 3.06299659e+00],
       [4.89500568e-01, 5.10499432e-01]])

critical = 24.996, chi2_test_statistic = 77.435
Dependent (reject H0)

significance = 0.050, p = 0.000
Dependent (reject H0)


country,Other,USA
company,Unnamed: 1_level_1,Unnamed: 2_level_1
Castle Rock Entertainment,2,35
Columbia Pictures,7,81
Columbia Pictures Corporation,6,111
Dimension Films,0,34
DreamWorks,2,59
Fox 2000 Pictures,1,45
Fox Searchlight Pictures,5,30
Hollywood Pictures,1,30
Metro-Goldwyn-Mayer (MGM),6,54
Miramax,7,38


Observed Values


array([[   2,   35],
       [   7,   81],
       [   6,  111],
       [   0,   34],
       [   2,   59],
       [   1,   45],
       [   5,   30],
       [   1,   30],
       [   6,   54],
       [   7,   38],
       [  10,  109],
       [ 373, 1504],
       [   9,  168],
       [   0,   32],
       [   1,   30],
       [   6,   70],
       [   1,   40],
       [   9,  119],
       [  27,  181],
       [   4,   65],
       [  20,  192]], dtype=int64)

chi2_test_statistic, p, dof, expected


128.8536092427827

6.393766101906721e-18

20

array([[   5.21821793,   31.78178207],
       [  12.41089671,   75.58910329],
       [  16.50085131,  100.49914869],
       [   4.79511918,   29.20488082],
       [   8.60300795,   52.39699205],
       [   6.48751419,   39.51248581],
       [   4.9361521 ,   30.0638479 ],
       [   4.37202043,   26.62797957],
       [   8.46197503,   51.53802497],
       [   6.34648127,   38.65351873],
       [  16.78291714,  102.21708286],
       [ 264.71878547, 1612.28121453],
       [  24.96282633,  152.03717367],
       [   4.51305335,   27.48694665],
       [   4.37202043,   26.62797957],
       [  10.7185017 ,   65.2814983 ],
       [   5.7823496 ,   35.2176504 ],
       [  18.05221339,  109.94778661],
       [  29.33484677,  178.66515323],
       [   9.73127128,   59.26872872],
       [  29.89897843,  182.10102157]])

critical = 31.410, chi2_test_statistic = 128.854
Dependent (reject H0)

significance = 0.050, p = 0.000
Dependent (reject H0)


director,Barry Levinson,Bruce Beresford,Clint Eastwood,Dennis Dugan,Martin Scorsese,Michael Apted,Oliver Stone,Other,Renny Harlin,Richard Donner,...,Ridley Scott,Rob Reiner,Robert Zemeckis,Ron Howard,Spike Lee,Steven Soderbergh,Steven Spielberg,Tim Burton,Wes Craven,Woody Allen
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Castle Rock Entertainment,0,0,1,0,0,1,0,28,0,0,...,0,5,1,0,0,0,0,0,0,0
Columbia Pictures,0,0,0,5,0,0,0,80,0,0,...,0,0,0,2,0,0,1,0,0,0
Columbia Pictures Corporation,0,0,0,1,0,1,0,109,0,1,...,1,2,0,0,1,0,0,1,0,0
Dimension Films,0,0,0,0,0,0,0,30,0,0,...,0,0,0,0,0,0,0,0,4,0
DreamWorks,0,0,1,0,0,0,0,46,0,0,...,1,0,1,0,0,0,7,0,1,4
Fox 2000 Pictures,0,0,0,0,0,2,0,43,0,0,...,1,0,0,0,0,0,0,0,0,0
Fox Searchlight Pictures,0,0,0,0,0,0,0,34,0,0,...,0,0,0,0,1,0,0,0,0,0
Hollywood Pictures,0,0,0,0,0,0,0,30,0,0,...,1,0,0,0,0,0,0,0,0,0
Metro-Goldwyn-Mayer (MGM),0,1,0,0,0,0,0,57,0,0,...,1,0,0,1,0,0,0,0,0,0
Miramax,0,0,0,0,1,0,0,41,0,0,...,0,0,0,0,0,1,0,0,0,2


Observed Values


array([[   0,    0,    1,    0,    0,    1,    0,   28,    0,    0,    1,
           0,    5,    1,    0,    0,    0,    0,    0,    0,    0],
       [   0,    0,    0,    5,    0,    0,    0,   80,    0,    0,    0,
           0,    0,    0,    2,    0,    0,    1,    0,    0,    0],
       [   0,    0,    0,    1,    0,    1,    0,  109,    0,    1,    0,
           1,    2,    0,    0,    1,    0,    0,    1,    0,    0],
       [   0,    0,    0,    0,    0,    0,    0,   30,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    4,    0],
       [   0,    0,    1,    0,    0,    0,    0,   46,    0,    0,    0,
           1,    0,    1,    0,    0,    0,    7,    0,    1,    4],
       [   0,    0,    0,    0,    0,    2,    0,   43,    0,    0,    0,
           1,    0,    0,    0,    0,    0,    0,    0,    0,    0],
       [   0,    0,    0,    0,    0,    0,    0,   34,    0,    0,    0,
           0,    0,    0,    0,    1,    0,    0,    0,    0,    0],

chi2_test_statistic, p, dof, expected


1089.9807821717773

2.8563531972245992e-65

400

array([[1.25993190e-01, 1.04994325e-01, 2.09988649e-01, 1.15493757e-01,
        1.04994325e-01, 1.04994325e-01, 1.46992054e-01, 3.40286606e+01,
        1.25993190e-01, 1.15493757e-01, 1.36492622e-01, 1.57491487e-01,
        1.36492622e-01, 1.25993190e-01, 1.78490352e-01, 1.57491487e-01,
        1.78490352e-01, 1.99489217e-01, 1.25993190e-01, 1.25993190e-01,
        2.93984109e-01],
       [2.99659478e-01, 2.49716232e-01, 4.99432463e-01, 2.74687855e-01,
        2.49716232e-01, 2.49716232e-01, 3.49602724e-01, 8.09330306e+01,
        2.99659478e-01, 2.74687855e-01, 3.24631101e-01, 3.74574347e-01,
        3.24631101e-01, 2.99659478e-01, 4.24517594e-01, 3.74574347e-01,
        4.24517594e-01, 4.74460840e-01, 2.99659478e-01, 2.99659478e-01,
        6.99205448e-01],
       [3.98410897e-01, 3.32009081e-01, 6.64018161e-01, 3.65209989e-01,
        3.32009081e-01, 3.32009081e-01, 4.64812713e-01, 1.07604143e+02,
        3.98410897e-01, 3.65209989e-01, 4.31611805e-01, 4.98013621e-01,
        4.3161

critical = 447.632, chi2_test_statistic = 1089.981
Dependent (reject H0)

significance = 0.050, p = 0.000
Dependent (reject H0)


rating,0,G,NC-17,NOT RATED,Not specified,PG,PG-13,R,UNRATED
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Castle Rock Entertainment,0,1,0,0,0,5,15,16,0
Columbia Pictures,0,0,0,0,0,20,43,25,0
Columbia Pictures Corporation,0,0,0,0,0,10,54,53,0
Dimension Films,0,0,0,0,0,1,10,23,0
DreamWorks,0,0,0,0,0,6,33,22,0
Fox 2000 Pictures,0,2,0,0,0,18,18,8,0
Fox Searchlight Pictures,0,0,1,0,0,2,6,26,0
Hollywood Pictures,0,0,0,0,0,7,10,14,0
Metro-Goldwyn-Mayer (MGM),0,0,0,0,0,11,28,21,0
Miramax,0,0,0,0,0,4,11,30,0


Observed Values


array([[   0,    1,    0,    0,    0,    5,   15,   16,    0],
       [   0,    0,    0,    0,    0,   20,   43,   25,    0],
       [   0,    0,    0,    0,    0,   10,   54,   53,    0],
       [   0,    0,    0,    0,    0,    1,   10,   23,    0],
       [   0,    0,    0,    0,    0,    6,   33,   22,    0],
       [   0,    2,    0,    0,    0,   18,   18,    8,    0],
       [   0,    0,    1,    0,    0,    2,    6,   26,    0],
       [   0,    0,    0,    0,    0,    7,   10,   14,    0],
       [   0,    0,    0,    0,    0,   11,   28,   21,    0],
       [   0,    0,    0,    0,    0,    4,   11,   30,    0],
       [   0,    1,    0,    0,    0,    7,   43,   68,    0],
       [   9,   40,    4,   24,    3,  239,  492, 1053,   13],
       [   0,    3,    0,    0,    0,   24,   85,   65,    0],
       [   0,    0,    0,    0,    0,    0,   20,   12,    0],
       [   0,    0,    0,    0,    0,    2,   23,    6,    0],
       [   0,    1,    0,    0,    0,   16,   38,   21,

chi2_test_statistic, p, dof, expected


731.5286210479003

6.267700202785287e-74

160

array([[9.44948922e-02, 7.24460840e-01, 5.24971623e-02, 2.51986379e-01,
        3.14982974e-02, 5.41770715e+00, 1.26203178e+01, 1.76705448e+01,
        1.36492622e-01],
       [2.24744608e-01, 1.72304200e+00, 1.24858116e-01, 5.99318956e-01,
        7.49148695e-02, 1.28853575e+01, 3.00158910e+01, 4.20272418e+01,
        3.24631101e-01],
       [2.98808173e-01, 2.29086266e+00, 1.66004540e-01, 7.96821793e-01,
        9.96027242e-02, 1.71316686e+01, 3.99074915e+01, 5.58771283e+01,
        4.31611805e-01],
       [8.68331442e-02, 6.65720772e-01, 4.82406356e-02, 2.31555051e-01,
        2.89443814e-02, 4.97843360e+00, 1.15970488e+01, 1.62377980e+01,
        1.25425653e-01],
       [1.55788876e-01, 1.19438138e+00, 8.65493757e-02, 4.15437003e-01,
        5.19296254e-02, 8.93189557e+00, 2.08064699e+01, 2.91325199e+01,
        2.25028377e-01],
       [1.17480136e-01, 9.00681044e-01, 6.52667423e-02, 3.13280363e-01,
        3.91600454e-02, 6.73552781e+00, 1.56901249e+01, 2.19687855e+01,
        1.6

critical = 190.516, chi2_test_statistic = 731.529
Dependent (reject H0)

significance = 0.050, p = 0.000
Dependent (reject H0)


released,0,1.01.2004 00:00,1.02.1991 00:00,1.02.2002 00:00,1.02.2008 00:00,1.02.2013 00:00,1.03.1991 00:00,1.03.1996 00:00,1.03.2002 00:00,1.03.2013 00:00,...,9.11.1994 00:00,9.11.2001 00:00,9.11.2005 00:00,9.11.2007 00:00,9.11.2012 00:00,9.12.1988 00:00,9.12.1994 00:00,9.12.2005 00:00,9.12.2011 00:00,9.12.2016 00:00
company,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Castle Rock Entertainment,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Columbia Pictures,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Columbia Pictures Corporation,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Dimension Films,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
DreamWorks,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Fox 2000 Pictures,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Fox Searchlight Pictures,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
Hollywood Pictures,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Metro-Goldwyn-Mayer (MGM),0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
Miramax,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0


Observed Values


array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 1, 0, 0]], dtype=int64)

chi2_test_statistic, p, dof, expected


33096.17975178755

1.0

35420

array([[0.09449489, 0.01049943, 0.02099886, ..., 0.02099886, 0.02099886,
        0.02099886],
       [0.22474461, 0.02497162, 0.04994325, ..., 0.04994325, 0.04994325,
        0.04994325],
       [0.29880817, 0.03320091, 0.06640182, ..., 0.06640182, 0.06640182,
        0.06640182],
       ...,
       [0.53121453, 0.05902384, 0.11804767, ..., 0.11804767, 0.11804767,
        0.11804767],
       [0.1762202 , 0.01958002, 0.03916005, ..., 0.03916005, 0.03916005,
        0.03916005],
       [0.54143019, 0.06015891, 0.12031782, ..., 0.12031782, 0.12031782,
        0.12031782]])

critical = 35858.925, chi2_test_statistic = 33096.180
Independent (fail to reject H0)

significance = 0.050, p = 1.000
Independent (fail to reject H0)


isprofit,0,1
company,Unnamed: 1_level_1,Unnamed: 2_level_1
Castle Rock Entertainment,18,19
Columbia Pictures,33,55
Columbia Pictures Corporation,58,59
Dimension Films,12,22
DreamWorks,29,32
Fox 2000 Pictures,18,28
Fox Searchlight Pictures,11,24
Hollywood Pictures,13,18
Metro-Goldwyn-Mayer (MGM),36,24
Miramax,21,24


Observed Values


array([[  18,   19],
       [  33,   55],
       [  58,   59],
       [  12,   22],
       [  29,   32],
       [  18,   28],
       [  11,   24],
       [  13,   18],
       [  36,   24],
       [  21,   24],
       [  42,   77],
       [1044,  833],
       [  59,  118],
       [  10,   22],
       [  11,   20],
       [  26,   50],
       [  17,   24],
       [  53,   75],
       [  90,  118],
       [  19,   50],
       [ 105,  107]], dtype=int64)

chi2_test_statistic, p, dof, expected


108.59106965831063

3.54619926405531e-14

20

array([[ 18.111521  ,  18.888479  ],
       [ 43.07604994,  44.92395006],
       [ 57.2715664 ,  59.7284336 ],
       [ 16.6430193 ,  17.3569807 ],
       [ 29.85953462,  31.14046538],
       [ 22.51702611,  23.48297389],
       [ 17.13251986,  17.86748014],
       [ 15.17451759,  15.82548241],
       [ 29.37003405,  30.62996595],
       [ 22.02752554,  22.97247446],
       [ 58.25056754,  60.74943246],
       [918.79256527, 958.20743473],
       [ 86.64160045,  90.35839955],
       [ 15.66401816,  16.33598184],
       [ 15.17451759,  15.82548241],
       [ 37.20204313,  38.79795687],
       [ 20.06952327,  20.93047673],
       [ 62.65607264,  65.34392736],
       [101.81611805, 106.18388195],
       [ 33.77553916,  35.22446084],
       [103.77412032, 108.22587968]])

critical = 31.410, chi2_test_statistic = 108.591
Dependent (reject H0)

significance = 0.050, p = 0.000
Dependent (reject H0)


director,Barry Levinson,Bruce Beresford,Clint Eastwood,Dennis Dugan,Martin Scorsese,Michael Apted,Oliver Stone,Other,Renny Harlin,Richard Donner,...,Ridley Scott,Rob Reiner,Robert Zemeckis,Ron Howard,Spike Lee,Steven Soderbergh,Steven Spielberg,Tim Burton,Wes Craven,Woody Allen
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Other,0,4,1,0,2,1,4,472,0,1,...,2,0,1,0,0,2,1,1,0,4
USA,12,6,19,11,8,9,10,2769,12,10,...,13,13,11,17,15,15,18,11,12,24


Observed Values


array([[   0,    4,    1,    0,    2,    1,    4,  472,    0,    1,    1,
           2,    0,    1,    0,    0,    2,    1,    1,    0,    4],
       [  12,    6,   19,   11,    8,    9,   10, 2769,   12,   10,   12,
          13,   13,   11,   17,   15,   15,   18,   11,   12,   24]],
      dtype=int64)

chi2_test_statistic, p, dof, expected


28.06027075879496

0.10798029507256064

20

array([[1.69239501e+00, 1.41032917e+00, 2.82065834e+00, 1.55136209e+00,
        1.41032917e+00, 1.41032917e+00, 1.97446084e+00, 4.57087684e+02,
        1.69239501e+00, 1.55136209e+00, 1.83342792e+00, 2.11549376e+00,
        1.83342792e+00, 1.69239501e+00, 2.39755959e+00, 2.11549376e+00,
        2.39755959e+00, 2.67962543e+00, 1.69239501e+00, 1.69239501e+00,
        3.94892168e+00],
       [1.03076050e+01, 8.58967083e+00, 1.71793417e+01, 9.44863791e+00,
        8.58967083e+00, 8.58967083e+00, 1.20255392e+01, 2.78391232e+03,
        1.03076050e+01, 9.44863791e+00, 1.11665721e+01, 1.28845062e+01,
        1.11665721e+01, 1.03076050e+01, 1.46024404e+01, 1.28845062e+01,
        1.46024404e+01, 1.63203746e+01, 1.03076050e+01, 1.03076050e+01,
        2.40510783e+01]])

critical = 31.410, chi2_test_statistic = 28.060
Independent (fail to reject H0)

significance = 0.050, p = 0.108
Independent (fail to reject H0)


rating,0,G,NC-17,NOT RATED,Not specified,PG,PG-13,R,UNRATED
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Other,9,13,1,13,2,61,141,253,4
USA,0,56,4,11,1,455,1061,1430,9


Observed Values


array([[   9,   13,    1,   13,    2,   61,  141,  253,    4],
       [   0,   56,    4,   11,    1,  455, 1061, 1430,    9]],
      dtype=int64)

chi2_test_statistic, p, dof, expected


106.86195191438502

1.6790128058106214e-19

8

array([[1.26929625e+00, 9.73127128e+00, 7.05164586e-01, 3.38479001e+00,
        4.23098751e-01, 7.27729852e+01, 1.69521566e+02, 2.37358400e+02,
        1.83342792e+00],
       [7.73070375e+00, 5.92687287e+01, 4.29483541e+00, 2.06152100e+01,
        2.57690125e+00, 4.43227015e+02, 1.03247843e+03, 1.44564160e+03,
        1.11665721e+01]])

critical = 15.507, chi2_test_statistic = 106.862
Dependent (reject H0)

significance = 0.050, p = 0.000
Dependent (reject H0)


released,0,1.01.2004 00:00,1.02.1991 00:00,1.02.2002 00:00,1.02.2008 00:00,1.02.2013 00:00,1.03.1991 00:00,1.03.1996 00:00,1.03.2002 00:00,1.03.2013 00:00,...,9.11.1994 00:00,9.11.2001 00:00,9.11.2005 00:00,9.11.2007 00:00,9.11.2012 00:00,9.12.1988 00:00,9.12.1994 00:00,9.12.2005 00:00,9.12.2011 00:00,9.12.2016 00:00
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Other,9,1,0,2,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
USA,0,0,2,0,2,3,1,2,3,2,...,1,2,1,3,0,1,2,2,2,2


Observed Values


array([[9, 1, 0, ..., 0, 0, 0],
       [0, 0, 2, ..., 2, 2, 2]], dtype=int64)

chi2_test_statistic, p, dof, expected


1736.2411953179583

0.7179228733334809

1771

array([[1.26929625, 0.14103292, 0.28206583, ..., 0.28206583, 0.28206583,
        0.28206583],
       [7.73070375, 0.85896708, 1.71793417, ..., 1.71793417, 1.71793417,
        1.71793417]])

critical = 1870.017, chi2_test_statistic = 1736.241
Independent (fail to reject H0)

significance = 0.050, p = 0.718
Independent (fail to reject H0)


isprofit,0,1
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Other,330,167
USA,1395,1632


Observed Values


array([[ 330,  167],
       [1395, 1632]], dtype=int64)

chi2_test_statistic, p, dof, expected


69.68137484569843

6.970116776098656e-17

1

array([[ 243.28178207,  253.71821793],
       [1481.71821793, 1545.28178207]])

critical = 3.841, chi2_test_statistic = 69.681
Dependent (reject H0)

significance = 0.050, p = 0.000
Dependent (reject H0)


rating,0,G,NC-17,NOT RATED,Not specified,PG,PG-13,R,UNRATED
director,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Barry Levinson,0,0,0,0,0,0,2,10,0
Bruce Beresford,0,0,0,0,0,2,3,5,0
Clint Eastwood,0,0,0,0,0,1,7,12,0
Dennis Dugan,0,0,0,0,0,2,9,0,0
Martin Scorsese,0,0,0,0,0,1,1,8,0
Michael Apted,0,0,0,0,0,2,4,4,0
Oliver Stone,0,0,0,0,0,0,3,11,0
Other,9,68,5,23,3,484,1097,1539,13
Renny Harlin,0,0,0,0,0,0,4,8,0
Richard Donner,0,0,0,0,0,1,4,6,0


Observed Values


array([[   0,    0,    0,    0,    0,    0,    2,   10,    0],
       [   0,    0,    0,    0,    0,    2,    3,    5,    0],
       [   0,    0,    0,    0,    0,    1,    7,   12,    0],
       [   0,    0,    0,    0,    0,    2,    9,    0,    0],
       [   0,    0,    0,    0,    0,    1,    1,    8,    0],
       [   0,    0,    0,    0,    0,    2,    4,    4,    0],
       [   0,    0,    0,    0,    0,    0,    3,   11,    0],
       [   9,   68,    5,   23,    3,  484, 1097, 1539,   13],
       [   0,    0,    0,    0,    0,    0,    4,    8,    0],
       [   0,    0,    0,    0,    0,    1,    4,    6,    0],
       [   0,    0,    0,    0,    0,    0,    4,    9,    0],
       [   0,    0,    0,    0,    0,    0,    5,   10,    0],
       [   0,    0,    0,    0,    0,    2,    6,    5,    0],
       [   0,    1,    0,    0,    0,    5,    4,    2,    0],
       [   0,    0,    0,    0,    0,    3,   10,    4,    0],
       [   0,    0,    0,    0,    0,    0,    1,   14,

chi2_test_statistic, p, dof, expected


109.42592092116308

0.9992045661618998

160

array([[3.06469921e-02, 2.34960272e-01, 1.70261067e-02, 8.17253121e-02,
        1.02156640e-02, 1.75709421e+00, 4.09307605e+00, 5.73098751e+00,
        4.42678774e-02],
       [2.55391600e-02, 1.95800227e-01, 1.41884222e-02, 6.81044268e-02,
        8.51305335e-03, 1.46424518e+00, 3.41089671e+00, 4.77582293e+00,
        3.68898978e-02],
       [5.10783201e-02, 3.91600454e-01, 2.83768445e-02, 1.36208854e-01,
        1.70261067e-02, 2.92849035e+00, 6.82179342e+00, 9.55164586e+00,
        7.37797957e-02],
       [2.80930760e-02, 2.15380250e-01, 1.56072645e-02, 7.49148695e-02,
        9.36435868e-03, 1.61066969e+00, 3.75198638e+00, 5.25340522e+00,
        4.05788876e-02],
       [2.55391600e-02, 1.95800227e-01, 1.41884222e-02, 6.81044268e-02,
        8.51305335e-03, 1.46424518e+00, 3.41089671e+00, 4.77582293e+00,
        3.68898978e-02],
       [2.55391600e-02, 1.95800227e-01, 1.41884222e-02, 6.81044268e-02,
        8.51305335e-03, 1.46424518e+00, 3.41089671e+00, 4.77582293e+00,
        3.6

critical = 190.516, chi2_test_statistic = 109.426
Independent (fail to reject H0)

significance = 0.050, p = 0.999
Independent (fail to reject H0)


released,0,1.01.2004 00:00,1.02.1991 00:00,1.02.2002 00:00,1.02.2008 00:00,1.02.2013 00:00,1.03.1991 00:00,1.03.1996 00:00,1.03.2002 00:00,1.03.2013 00:00,...,9.11.1994 00:00,9.11.2001 00:00,9.11.2005 00:00,9.11.2007 00:00,9.11.2012 00:00,9.12.1988 00:00,9.12.1994 00:00,9.12.2005 00:00,9.12.2011 00:00,9.12.2016 00:00
director,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Barry Levinson,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
Bruce Beresford,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Clint Eastwood,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Dennis Dugan,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Martin Scorsese,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Michael Apted,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Oliver Stone,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Other,9,1,2,2,2,3,0,2,3,2,...,1,2,1,3,1,1,1,2,2,2
Renny Harlin,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Richard Donner,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Observed Values


array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

chi2_test_statistic, p, dof, expected


36241.10439846513

0.0010919718939522557

35420

array([[0.03064699, 0.00340522, 0.00681044, ..., 0.00681044, 0.00681044,
        0.00681044],
       [0.02553916, 0.00283768, 0.00567537, ..., 0.00567537, 0.00567537,
        0.00567537],
       [0.05107832, 0.00567537, 0.01135074, ..., 0.01135074, 0.01135074,
        0.01135074],
       ...,
       [0.03064699, 0.00340522, 0.00681044, ..., 0.00681044, 0.00681044,
        0.00681044],
       [0.03064699, 0.00340522, 0.00681044, ..., 0.00681044, 0.00681044,
        0.00681044],
       [0.07150965, 0.00794552, 0.01589103, ..., 0.01589103, 0.01589103,
        0.01589103]])

critical = 35858.925, chi2_test_statistic = 36241.104
Dependent (reject H0)

significance = 0.050, p = 0.001
Dependent (reject H0)


isprofit,0,1
director,Unnamed: 1_level_1,Unnamed: 2_level_1
Barry Levinson,5,7
Bruce Beresford,7,3
Clint Eastwood,8,12
Dennis Dugan,2,9
Martin Scorsese,5,5
Michael Apted,4,6
Oliver Stone,6,8
Other,1595,1646
Renny Harlin,8,4
Richard Donner,5,6


Observed Values


array([[   5,    7],
       [   7,    3],
       [   8,   12],
       [   2,    9],
       [   5,    5],
       [   4,    6],
       [   6,    8],
       [1595, 1646],
       [   8,    4],
       [   5,    6],
       [   4,    9],
       [   9,    6],
       [   6,    7],
       [   2,   10],
       [   8,    9],
       [   8,    7],
       [   7,   10],
       [   5,   14],
       [   5,    7],
       [   4,    8],
       [  22,    6]], dtype=int64)

chi2_test_statistic, p, dof, expected


32.22157475338656

0.04099378720033222

20

array([[   5.87400681,    6.12599319],
       [   4.89500568,    5.10499432],
       [   9.79001135,   10.20998865],
       [   5.38450624,    5.61549376],
       [   4.89500568,    5.10499432],
       [   4.89500568,    5.10499432],
       [   6.85300795,    7.14699205],
       [1586.47133939, 1654.52866061],
       [   5.87400681,    6.12599319],
       [   5.38450624,    5.61549376],
       [   6.36350738,    6.63649262],
       [   7.34250851,    7.65749149],
       [   6.36350738,    6.63649262],
       [   5.87400681,    6.12599319],
       [   8.32150965,    8.67849035],
       [   7.34250851,    7.65749149],
       [   8.32150965,    8.67849035],
       [   9.30051078,    9.69948922],
       [   5.87400681,    6.12599319],
       [   5.87400681,    6.12599319],
       [  13.70601589,   14.29398411]])

critical = 31.410, chi2_test_statistic = 32.222
Dependent (reject H0)

significance = 0.050, p = 0.041
Dependent (reject H0)


released,0,1.01.2004 00:00,1.02.1991 00:00,1.02.2002 00:00,1.02.2008 00:00,1.02.2013 00:00,1.03.1991 00:00,1.03.1996 00:00,1.03.2002 00:00,1.03.2013 00:00,...,9.11.1994 00:00,9.11.2001 00:00,9.11.2005 00:00,9.11.2007 00:00,9.11.2012 00:00,9.12.1988 00:00,9.12.1994 00:00,9.12.2005 00:00,9.12.2011 00:00,9.12.2016 00:00
rating,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
G,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
NC-17,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
NOT RATED,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Not specified,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
PG,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,1,0,1,0,0
PG-13,0,1,0,0,1,1,0,2,0,1,...,0,1,0,0,1,0,0,0,0,0
R,0,0,2,2,1,2,1,0,3,1,...,1,1,1,2,0,0,2,1,2,2
UNRATED,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Observed Values


array([[9, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 2, ..., 1, 2, 2],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

chi2_test_statistic, p, dof, expected


16264.548918097264

6.47638321973948e-33

14168

array([[2.29852440e-02, 2.55391600e-03, 5.10783201e-03, ...,
        5.10783201e-03, 5.10783201e-03, 5.10783201e-03],
       [1.76220204e-01, 1.95800227e-02, 3.91600454e-02, ...,
        3.91600454e-02, 3.91600454e-02, 3.91600454e-02],
       [1.27695800e-02, 1.41884222e-03, 2.83768445e-03, ...,
        2.83768445e-03, 2.83768445e-03, 2.83768445e-03],
       ...,
       [3.06980704e+00, 3.41089671e-01, 6.82179342e-01, ...,
        6.82179342e-01, 6.82179342e-01, 6.82179342e-01],
       [4.29824064e+00, 4.77582293e-01, 9.55164586e-01, ...,
        9.55164586e-01, 9.55164586e-01, 9.55164586e-01],
       [3.32009081e-02, 3.68898978e-03, 7.37797957e-03, ...,
        7.37797957e-03, 7.37797957e-03, 7.37797957e-03]])

critical = 14446.016, chi2_test_statistic = 16264.549
Dependent (reject H0)

significance = 0.050, p = 0.000
Dependent (reject H0)


isprofit,0,1
rating,Unnamed: 1_level_1,Unnamed: 2_level_1
0,9,0
G,26,43
NC-17,3,2
NOT RATED,16,8
Not specified,2,1
PG,213,303
PG-13,561,641
R,889,794
UNRATED,6,7


Observed Values


array([[  9,   0],
       [ 26,  43],
       [  3,   2],
       [ 16,   8],
       [  2,   1],
       [213, 303],
       [561, 641],
       [889, 794],
       [  6,   7]], dtype=int64)

chi2_test_statistic, p, dof, expected


41.314212130230835

1.820693238192019e-06

8

array([[  4.40550511,   4.59449489],
       [ 33.77553916,  35.22446084],
       [  2.44750284,   2.55249716],
       [ 11.74801362,  12.25198638],
       [  1.4685017 ,   1.5314983 ],
       [252.58229285, 263.41770715],
       [588.37968218, 613.62031782],
       [823.82945516, 859.17054484],
       [  6.36350738,   6.63649262]])

critical = 15.507, chi2_test_statistic = 41.314
Dependent (reject H0)

significance = 0.050, p = 0.000
Dependent (reject H0)


isprofit,0,1
released,Unnamed: 1_level_1,Unnamed: 2_level_1
0,9,0
1.01.2004 00:00,0,1
1.02.1991 00:00,2,0
1.02.2002 00:00,2,0
1.02.2008 00:00,2,0
...,...,...
9.12.1988 00:00,0,1
9.12.1994 00:00,1,1
9.12.2005 00:00,0,2
9.12.2011 00:00,1,1


Observed Values


array([[9, 0],
       [0, 1],
       [2, 0],
       ...,
       [0, 2],
       [1, 1],
       [1, 1]], dtype=int64)

chi2_test_statistic, p, dof, expected


1803.4698996751126

0.28997295194919154

1771

array([[4.40550511, 4.59449489],
       [0.48950057, 0.51049943],
       [0.97900114, 1.02099886],
       ...,
       [0.97900114, 1.02099886],
       [0.97900114, 1.02099886],
       [0.97900114, 1.02099886]])

critical = 1870.017, chi2_test_statistic = 1803.470
Independent (fail to reject H0)

significance = 0.050, p = 0.290
Independent (fail to reject H0)
