In [16]:
from sklearn.model_selection import KFold
from sklearn.linear_model import LassoCV
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Lasso, LinearRegression
import pandas as pd
from statsmodels.tools import add_constant
from linearmodels import PanelOLS, RandomEffects


def read_data(group_name):
    file_path = f"C:/Users/koala/Documents/МГУ/6 семестр/Untitled Folder/new/{group_name}/{group_name}_group_all_seasons_dummy_updated.csv"
    return pd.read_csv(file_path, index_col=['id'])



def build_fixed_effects_regression(df, target, features):
    X = add_constant(df[features])
    y = df[target]
    fixed_effects_mod = PanelOLS(y, X, entity_effects=True)
    fixed_effects_res = fixed_effects_mod.fit()
    return fixed_effects_res

def build_random_effects_regression(df, target, features):
    X = add_constant(df[features])
    y = df[target]
    random_effects_mod = RandomEffects(y, X)
    random_effects_res = random_effects_mod.fit()
    return random_effects_res

def build_pooled_ols_regression(df, target, features):
    X = add_constant(df[features])
    y = df[target]
    pooled_ols_mod = PooledOLS(y, X)
    pooled_ols_res = pooled_ols_mod.fit()
    return pooled_ols_res

def cross_val_error(model, X, y, error_func):
    scores = cross_val_score(model, X, y, scoring='neg_mean_absolute_error', cv=5)
    return -1 * scores.mean()

def split_data(df, train_seasons):
    train_df = df[df['Season'].isin(train_seasons)]
    test_df = df[~df['Season'].isin(train_seasons)]
    return train_df, test_df

models = {
    'Pooled OLS': build_pooled_ols_regression,
    'Random Effects': build_random_effects_regression,
    'Fixed Effects': build_fixed_effects_regression
}


group_names = ['red', 'yellow', 'green']
regressors = {
    'red': ['Age',	'Age^2',	'Gls',	'SoT%',	'Cmp%',	'Ast',	'Tkl%',	'Clr',	'Att.5',	'Succ%',	'PPM',	'CrdY',	'Won%'],
    'yellow': ['Age',	'xG',	'Blocks',	'Tkl+Int',	'Err',	'Def Pen',	'Att.5',	'Succ%',	'Mn/Sub',	'xG+/-90',	'Won%'],
    'green': ['Age',	'Sh/90',	'np:G-xG',	'Dist',	'Cmp%',	'A-xAG',	'Crs',	'TO',	'Tkl%',	'Tkl+Int',	'Def Pen',	'Succ%',	'44986',	'onGA',	'xG+/-90',	'Won%']
}

train_seasons = ['2017', '2018', '2019', '2020']

for group_name in group_names:
    df = read_data(group_name)

    train_df, test_df = split_data(df, train_seasons)

    selected_features = regressors[group_name]

    min_error = float('inf')
    best_model_name = None
    best_model = None

    for model_name, build_model_func in models.items():
        model = build_model_func(train_df, 'RATING', selected_features)
        error = cross_val_error(model, train_df[selected_features], train_df['RATING'], mean_absolute_error)

        print(f"Cross-validation error (MAE) for {group_name} group using {model_name}: {error:.4f}")

        if error < min_error:
            min_error = error
            best_model_name = model_name
            best_model = model

    print(f"Best model for {group_name} group is {best_model_name} with MAE: {min_error:.4f}")

    # Оценка точности лучшей модели на тестовой выборке
    test_error = mean_absolute_error(test_df['RATING'], best_model.predict(test_df[selected_features]))
    print(f"Test error (MAE) for {group_name} group using {best_model_name}: {test_error:.4f}\n")

NameError: name 'PooledOLS' is not defined