In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from scipy.special import expit

In [2]:
aggrements = pd.read_csv("agreements.csv")

In [3]:
(aggrements
 .groupby("call")
 .mean())

Unnamed: 0_level_0,agreements
call,Unnamed: 1_level_1
0,0.505792
1,0.497925


## Running an Experiment
### T-Test

In [4]:
from scipy import stats

results = stats.ttest_ind(
    aggrements.query("call==0"),
    aggrements.query("call==1"),
    equal_var=False
)

results.pvalue

array([0.       , 0.8039154])

### Linear Regression

In [5]:
import statsmodels.formula.api as smf

results = smf.ols('agreements ~ call', data=aggrements).fit()

results.summary()

0,1,2,3
Dep. Variable:,agreements,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.001
Method:,Least Squares,F-statistic:,0.06168
Date:,"Mon, 18 Nov 2019",Prob (F-statistic):,0.804
Time:,20:52:41,Log-Likelihood:,-725.75
No. Observations:,1000,AIC:,1456.0
Df Residuals:,998,BIC:,1465.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.5058,0.022,23.001,0.000,0.463,0.549
call,-0.0079,0.032,-0.248,0.804,-0.070,0.054

0,1,2,3
Omnibus:,3764.159,Durbin-Watson:,1.973
Prob(Omnibus):,0.0,Jarque-Bera (JB):,166.625
Skew:,-0.008,Prob(JB):,6.570000000000001e-37
Kurtosis:,1.0,Cond. No.,2.58


In [6]:
aggrements.corr()

Unnamed: 0,call,agreements
call,1.0,-0.007861
agreements,-0.007861,1.0


## Exercise


```
WAGE1.DES

wage      educ      exper     tenure    nonwhite  female    married   numdep   
smsa      northcen  south     west      construc  ndurman   trcommpu  trade    
services  profserv  profocc   clerocc   servocc   lwage     expersq   tenursq  

  Obs:   526

  1. wage                     average hourly earnings
  2. educ                     years of education
  3. exper                    years potential experience
  4. tenure                   years with current employer
  5. nonwhite                 =1 if nonwhite
  6. female                   =1 if female
  7. married                  =1 if married
  8. numdep                   number of dependents
  9. smsa                     =1 if live in SMSA
 10. northcen                 =1 if live in north central U.S
 11. south                    =1 if live in southern region
 12. west                     =1 if live in western region
 13. construc                 =1 if work in construc. indus.
 14. ndurman                  =1 if in nondur. manuf. indus.
 15. trcommpu                 =1 if in trans, commun, pub ut
 16. trade                    =1 if in wholesale or retail
 17. services                 =1 if in services indus.
 18. profserv                 =1 if in prof. serv. indus.
 19. profocc                  =1 if in profess. occupation
 20. clerocc                  =1 if in clerical occupation
 21. servocc                  =1 if in service occupation
 22. lwage                    log(wage)
 23. expersq                  exper^2
 24. tenursq                  tenure^2
```

In [7]:
wage = pd.read_stata("WAGE1.DTA")

In [8]:
results = smf.ols('wage ~ educ', data=wage).fit()

In [9]:
results.summary()

0,1,2,3
Dep. Variable:,wage,R-squared:,0.165
Model:,OLS,Adj. R-squared:,0.163
Method:,Least Squares,F-statistic:,103.4
Date:,"Mon, 18 Nov 2019",Prob (F-statistic):,2.78e-22
Time:,20:52:41,Log-Likelihood:,-1385.7
No. Observations:,526,AIC:,2775.0
Df Residuals:,524,BIC:,2784.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.9049,0.685,-1.321,0.187,-2.250,0.441
educ,0.5414,0.053,10.167,0.000,0.437,0.646

0,1,2,3
Omnibus:,212.554,Durbin-Watson:,1.824
Prob(Omnibus):,0.0,Jarque-Bera (JB):,807.843
Skew:,1.861,Prob(JB):,3.79e-176
Kurtosis:,7.797,Cond. No.,60.2
