In [50]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as st
import pingouin as pg

import statsmodels.api as sm
from statsmodels.formula.api import ols
import statsmodels.stats.outliers_influence as sms
from statsmodels.tools.tools import add_constant

from patsy.contrasts import ContrastMatrix
from patsy.contrasts import Poly
%matplotlib inline

In [51]:
data = pd.read_csv('gats_fes.csv')

In [52]:
data['musicx'] = data['music'].replace({'Crusty':2,'Indie Kid':3, 'Metaller':4 , 'No Musical Affiliation':1})

In [53]:
data = data[data['change']!=' ']
data.reset_index(drop=True, inplace=True)

In [54]:
data['change'] = pd.to_numeric(data['change'])

In [55]:
m01 = ols('change~C(musicx)', data=data).fit()
m01.summary()

0,1,2,3
Dep. Variable:,change,R-squared:,0.076
Model:,OLS,Adj. R-squared:,0.053
Method:,Least Squares,F-statistic:,3.27
Date:,"Sat, 25 Jul 2020",Prob (F-statistic):,0.0237
Time:,23:42:32,Log-Likelihood:,-126.53
No. Observations:,123,AIC:,261.1
Df Residuals:,119,BIC:,272.3
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.5543,0.090,-6.134,0.000,-0.733,-0.375
C(musicx)[T.2],-0.4115,0.167,-2.464,0.015,-0.742,-0.081
C(musicx)[T.3],-0.4100,0.205,-2.001,0.048,-0.816,-0.004
C(musicx)[T.4],0.0284,0.160,0.177,0.860,-0.289,0.346

0,1,2,3
Omnibus:,0.87,Durbin-Watson:,1.893
Prob(Omnibus):,0.647,Jarque-Bera (JB):,0.881
Skew:,-0.197,Prob(JB):,0.644
Kurtosis:,2.871,Cond. No.,3.96


In [56]:
data.drop(['ticknumb', 'day1', 'day2', 'day3'], axis=1, inplace=True)

In [57]:
data

Unnamed: 0,music,change,musicx
0,Metaller,-1.04,4
1,Crusty,-0.68,2
2,No Musical Affiliation,-0.56,1
3,Crusty,-0.35,2
4,No Musical Affiliation,-0.18,1
...,...,...,...
118,Indie Kid,-0.48,3
119,No Musical Affiliation,0.53,1
120,No Musical Affiliation,-0.12,1
121,Indie Kid,-0.91,3


In [58]:
data_1 = data[data['musicx']==1]
data_2 = data[data['musicx']==2]
data_3 = data[data['musicx']==3]
data_4 = data[data['musicx']==4]

In [59]:
stat, p = st.levene(data_1['change'], data_2['change'], data_3['change'],data_4['change'])
print(stat, p)

0.7672943020921851 0.5145482212741821


In [60]:
anova_table = sm.stats.anova_lm(m01)
anova_table

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
C(musicx),3.0,4.646474,1.548825,3.270357,0.023687
Residual,119.0,56.3578,0.473595,,


In [61]:
levels = [1,2,3,4]

In [62]:
def _name_levels(prefix, levels):
    return ["[%s%s]" % (prefix, level) for level in levels]

class Simple(object):
    def _simple_contrast(self, levels):
        nlevels = len(levels)
        contr = -1./nlevels * np.ones((nlevels, nlevels-1))
        contr[1:][np.diag_indices(nlevels-1)] = (nlevels-1.)/nlevels
        return contr

    def code_with_intercept(self, levels):
        contrast = np.column_stack((np.ones(len(levels)),
                                    self._simple_contrast(levels)))
        return ContrastMatrix(contrast, _name_levels("Simp.", levels))

    def code_without_intercept(self, levels):
        contrast = self._simple_contrast(levels)
        return ContrastMatrix(contrast, _name_levels("Simp.", levels[:-1]))

In [63]:
contrast = Simple().code_without_intercept(levels)
print(contrast.matrix)

[[-0.25 -0.25 -0.25]
 [ 0.75 -0.25 -0.25]
 [-0.25  0.75 -0.25]
 [-0.25 -0.25  0.75]]


In [66]:
mod = ols("change ~ C(musicx, Simple)", data=data)
res = mod.fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                 change   R-squared:                       0.076
Model:                            OLS   Adj. R-squared:                  0.053
Method:                 Least Squares   F-statistic:                     3.270
Date:                Sat, 25 Jul 2020   Prob (F-statistic):             0.0237
Time:                        23:51:04   Log-Likelihood:                -126.53
No. Observations:                 123   AIC:                             261.1
Df Residuals:                     119   BIC:                             272.3
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                                coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------
Intercept             

#### ***The output above shows the results of the contrasts comparing No Music Affiliation with each of the other levels of the variable musicusingcontr.SAS() and you might notice that it is the same  as  the  output  from  Chapter  7,  Section  7.12.2  above. The  first  level  of  the musicvariable is Crusty, therefore music1represents the contrast comparing people with no music affiliation  with  crusties.  The  beta  value  represents  the  differenceinthe  change  in hygiene scores for a crusty, relative to someone with no music affiliation. The t-test is significant (p = .015),  and  the  beta  value  has  a  negative  value (−0.41), so  we  could  say  that  the  change  in hygiene  scores  goes  down  as  a  person  changes  from  having  no  music  affiliation  to  being  a crusty.  Bear  in  mind  that  a  decrease  in  hygiene  scores  represents  greater  change  (you’re becoming smellier) so what this actually means is that hygiene decreased significantly more in crusties compared to those with no music affiliation.   Music2 represents  the  contrast  comparing indie kids to those with no music affiliation. The beta value represents the difference in the change in hygiene scores for an indie kid, relative to  someone  with  no  music  affiliation. The t-test  is again  significant (p = .048),  and  the  beta value  is  also  a  negative  value  (−0.41)  so,  as  with  the  first  contrast,  we  could  say  that  the change in hygiene scores goes down as a person changes from having no music affiliation to being an indie kid. Bear in mind that a decrease in hygiene scores represents more change (you’re becoming smellier) so what this actually means is that hygiene decreased significantly more in indie kids compared to those with no music affiliation.The  final  contrast,  music3  represents  the  contrast  comparing  metallers  to  those  with  no music affiliation. The beta value represents the difference in the change in hygiene scores for a metaller, relative to someone with no music affiliation This time the t-test is non significant (p = .86) and the beta value is a positive value (0.03). We could conclude that the change in hygiene  scores  is  similar  if  a  person  changes  from  having  no  music  affiliation  to  being  a metaller:  the  change  in  hygiene scores is not predicted  by  whether  someone  is  a  metaller compared to if they have no music affiliation. So,  overall  this  analysis  has  shown  that  compared  to  having  no  music  affiliation,  crusties and  indie  kids  get  significantly  smellier  across  the  three  days  of  the  music  festival,  but metallers don’t.***