In [24]:
import pandas as pd
from sklearn.linear_model import LassoCV
from statsmodels.tools import add_constant

def read_data(group_name):
    file_path = f"C:/Users/koala/Documents/МГУ/6 семестр/Untitled Folder/new/{group_name}/{group_name}_group_all_seasons_dummy_updated_new.csv"
    return pd.read_csv(file_path, index_col=['id', 'Season'])

def build_lasso_regression(df, target, features):
    X = add_constant(df[features])  # Add constant to the features
    y = df[target]
    lasso = LassoCV(cv=5, random_state=42).fit(X, y)
    return lasso

group_names = ['red', 'yellow', 'green']
regressors = {
    'red': ['Age', 'Age^2', 'Gls', 'SoT%', 'Cmp%', 'Ast', 'Tkl%', 'Clr', 'Att.5', 'Succ%', 'PPM', 'CrdY', 'CrdR', 'Won%', 'def', 'mid', 'forw'],
    'yellow': ['Age', 'xG', 'Blocks', 'Tkl+Int', 'Err', 'Def Pen', 'Att.5', 'Succ%', 'Mn/Sub', 'xG+/-90', 'OG', 'Won%', 'def', 'mid', 'forw', 'Age^2'],
    'green': ['Age', 'Sh/90', 'np:G-xG', 'Dist', 'npxG/Sh', 'Cmp%', 'A-xAG', 'Crs', 'TO', 'Tkl%', 'Tkl+Int', 'Err', 'Def Pen', 'Succ%', '44986', 'onGA', 'xG+/-90', 'CrdY', 'CrdR', 'Won%', 'def', 'mid', 'forw', 'Age^2']
}

lasso_models = {}

for group_name in group_names:
    df = read_data(group_name)
    lasso_model = build_lasso_regression(df, 'RATING', regressors[group_name])
    lasso_models[group_name] = lasso_model

    print(f"\nLasso Regression Results for {group_name} group:")
    coef_df = pd.DataFrame(lasso_model.coef_, index=['const'] + regressors[group_name], columns=['Coefficient'])
    coef_df = coef_df[coef_df['Coefficient'] != 0]  # Filter out zero coefficients
    print(coef_df)



Lasso Regression Results for red group:
       Coefficient
Age^2     0.004797
Gls       0.272159
SoT%      0.005942
Cmp%      0.123940
Ast       0.315364
Tkl%     -0.007367
Clr       0.015809
Att.5     0.024235
Succ%     0.007319
PPM       2.645793
CrdY      0.018055
Won%      0.007446

Lasso Regression Results for yellow group:
         Coefficient
xG          0.337470
Blocks      0.009266
Tkl+Int     0.014493
Err         0.017769
Def Pen     0.009102
Att.5       0.025955
Succ%       0.015606
Mn/Sub     -0.001260
xG+/-90     2.393429
Won%        0.002479
Age^2       0.005079

Lasso Regression Results for green group:
         Coefficient
Sh/90       0.663461
np:G-xG     0.244772
Dist       -0.080479
Cmp%        0.109724
A-xAG       0.114243
Crs         0.011196
TO          0.248788
Tkl%       -0.018398
Tkl+Int     0.016514
Def Pen     0.009428
Succ%       0.001857
44986       0.019125
onGA       -0.031843
xG+/-90     1.527393
Won%        0.004809
Age^2       0.005346


In [26]:
def build_fixed_effects_regression(df, target, features):
    X = add_constant(df[features])
    y = df[target]
    fixed_effects_mod = PanelOLS(y, X, entity_effects=True, time_effects=True, drop_absorbed=True, check_rank=False)
    fixed_effects_res = fixed_effects_mod.fit()
    return fixed_effects_res

def build_random_effects_regression(df, target, features):
    X = add_constant(df[features])
    y = df[target]
    random_effects_mod = RandomEffects(y, X, check_rank=False)
    random_effects_res = random_effects_mod.fit()
    return random_effects_res

group_names = ['red', 'yellow', 'green']
regressors = {
    'red': ['Age', 'Age^2', 'Gls', 'SoT%', 'Cmp%', 'Ast', 'Tkl%', 'Clr', 'Att.5', 'Succ%', 'PPM', 'CrdY', 'CrdR', 'Won%', 'def', 'mid', 'forw'],
    'yellow': ['Age', 'xG', 'Blocks', 'Tkl+Int', 'Err', 'Def Pen', 'Att.5', 'Succ%', 'Mn/Sub', 'xG+/-90', 'OG', 'Won%', 'def', 'mid', 'forw', 'Age^2'],
    'green': ['Age', 'Sh/90', 'np:G-xG', 'Dist', 'npxG/Sh', 'Cmp%', 'A-xAG', 'Crs', 'TO', 'Tkl%', 'Tkl+Int', 'Err', 'Def Pen', 'Succ%', '44986', 'onGA', 'xG+/-90', 'CrdY', 'CrdR', 'Won%', 'def', 'mid', 'forw', 'Age^2']
}

for group_name in group_names:
    df = read_data(group_name)
    
    fixed_effects_res = build_fixed_effects_regression(df, 'RATING', regressors[group_name])
    random_effects_res = build_random_effects_regression(df, 'RATING', regressors[group_name])

    print(f"\nFixed Effects Regression Results for {group_name} group:")
    print(fixed_effects_res)

    print(f"\nRandom Effects Regression Results for {group_name} group:")
    print(random_effects_res)

Variables have been fully absorbed and have removed from the regression:

Age, forw

  fixed_effects_res = fixed_effects_mod.fit()



Fixed Effects Regression Results for red group:
                          PanelOLS Estimation Summary                           
Dep. Variable:                 RATING   R-squared:                        0.3812
Estimator:                   PanelOLS   R-squared (Between):             -9.5008
No. Observations:                2464   R-squared (Within):              -6.3872
Date:                Sun, Apr 09 2023   R-squared (Overall):             -9.1239
Time:                        19:52:55   Log-likelihood                   -4053.0
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      75.163
Entities:                         616   P-value                           0.0000
Avg Obs:                       4.0000   Distribution:                 F(15,1830)
Min Obs:                       4.0000                                           
Max Obs:                       4.0000   F-statistic (robust)


Fixed Effects Regression Results for green group:
                          PanelOLS Estimation Summary                           
Dep. Variable:                 RATING   R-squared:                        0.3381
Estimator:                   PanelOLS   R-squared (Between):             -10.068
No. Observations:                2464   R-squared (Within):              -6.7058
Date:                Sun, Apr 09 2023   R-squared (Overall):             -9.6608
Time:                        19:52:58   Log-likelihood                   -4136.0
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      42.329
Entities:                         616   P-value                           0.0000
Avg Obs:                       4.0000   Distribution:                 F(22,1823)
Min Obs:                       4.0000                                           
Max Obs:                       4.0000   F-statistic (robus

In [35]:
import numpy.linalg as la
from scipy import stats

def hausman(fe, re, changed_covariates): 

    #вычленяем коэффициенты при переменных
    b = fe.params
    B = re.params.loc[changed_covariates] 

    #находим ковариационную матрицу
    v_b = fe.cov
    v_B = re.cov[changed_covariates].loc[changed_covariates]

    #находим кол-во степеней свободы
    df = b.size

    #рассчитываем тестовую статистику
    chi2 = np.dot((b - B).T, la.inv(v_b - v_B).dot(b - B))

    #находим p-value
    pval = stats.chi2.sf(chi2, df)
    return round(chi2, 2), pval



#for group_name in group_names:
    # Считывание данных для текущей группы
    group_df = read_data(group_name)

    # Выполните регрессии с фиксированными и случайными эффектами
    fe_res = build_fixed_effects_regression(group_df, target='RATING', features=regressors[group_name])
    re_res = build_random_effects_regression(group_df, target='RATING', features=regressors[group_name])

    # Выполните тест Хаусмана
    hausman_stat, p_value = hausman_test(fe_res, re_res)

    #print(f"Тест Хаусмана для группы {group_name}:")
    #print(f"Статистика Хаусмана: {hausman_stat:.4f}")
    #print(f"p-значение: {p_value:.4f}\n")


In [40]:
group_names = ['red', 'yellow', 'green']
regressors = {
    'red': ['Age^2', 'Gls', 'SoT%', 'Cmp%', 'Ast', 'Tkl%', 'Clr', 'Att.5', 'Succ%', 'PPM', 'CrdY', 'CrdR', 'Won%', 'def', 'mid'],
    'yellow': ['Age', 'xG', 'Blocks', 'Tkl+Int', 'Err', 'Def Pen', 'Att.5', 'Succ%', 'Mn/Sub', 'xG+/-90', 'OG', 'Won%', 'def', 'mid', 'forw', 'Age^2'],
    'green': ['Age', 'Sh/90', 'np:G-xG', 'Dist', 'npxG/Sh', 'Cmp%', 'A-xAG', 'Crs', 'TO', 'Tkl%', 'Tkl+Int', 'Err', 'Def Pen', 'Succ%', '44986', 'onGA', 'xG+/-90', 'CrdY', 'CrdR', 'Won%', 'def', 'mid', 'forw', 'Age^2']
}


for group_name in group_names:
    # Считывание данных для текущей группы
    group_df = read_data(group_name)

    # Выполните регрессии с фиксированными и случайными эффектами
    fe_res = build_fixed_effects_regression(group_df, target='RATING', features=regressors[group_name])
    re_res = build_random_effects_regression(group_df, target='RATING', features=regressors[group_name])

    # Выполните тест Хаусмана
    print(hausman(fe_res, re_res, regressors[group_name]))

(nan, nan)


Variables have been fully absorbed and have removed from the regression:

Age, forw

  fixed_effects_res = fixed_effects_mod.fit()


(nan, nan)
(nan, nan)


In [31]:
import pandas as pd
from linearmodels import PanelOLS, RandomEffects
from statsmodels.tools.tools import add_constant

# Предположим, что df содержит ваши данные
# df = ...

for group_name in group_names:
    # Ограничьте данные для текущей группы
    group_df = df[df['group'] == group_name]

    # Выполните регрессии с фиксированными и случайными эффектами
    fe_res = build_fixed_effects_regression(group_df, target='target_var', features=regressors[group_name])
    re_res = build_random_effects_regression(group_df, target='target_var', features=regressors[group_name])

    # Выполните тест Хаусмана
    hausman_stat, p_value = hausman_test(fe_res, re_res)

    print(f"Тест Хаусмана для группы {group_name}:")
    print(f"Статистика Хаусмана: {hausman_stat:.4f}")
    print(f"p-значение: {p_value:.4f}\n")


TypeError: 'int' object is not subscriptable

In [16]:
!pip install statsmodels

