In [1]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error

In [2]:
data = pd.read_csv('vgsales_17.csv')
data.dropna(inplace=True)
data

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,8861,NHL 09,PS2,2008.0,Sports,Electronic Arts,0.07,0.06,0.00,0.02,0.15
1,16363,Shirahana no Ori: Hiiro no Kakera 4 - Shiki no...,PSP,2013.0,Adventure,Idea Factory,0.00,0.00,0.01,0.00,0.01
2,3476,Gex: Enter the Gecko,PS,1998.0,Platform,BMG Interactive Entertainment,0.32,0.22,0.00,0.04,0.58
3,7432,Deus Ex: The Conspiracy,PS2,2002.0,Action,Eidos Interactive,0.10,0.08,0.00,0.03,0.21
4,501,Call of Duty: Finest Hour,PS2,2004.0,Shooter,Activision,1.51,1.12,0.01,0.24,2.89
...,...,...,...,...,...,...,...,...,...,...,...
548,5776,Jampack Volume 12,PS2,2005.0,Misc,Sony Computer Entertainment,0.15,0.12,0.00,0.04,0.31
549,662,Bloodborne,PS4,2015.0,Action,Sony Computer Entertainment,1.00,0.76,0.26,0.36,2.38
550,7811,IHRA Professional Drag Racing 2005,PS2,2004.0,Racing,Bethesda Softworks,0.09,0.07,0.00,0.02,0.19
551,2032,Shrek 2,GC,2004.0,Platform,Activision,0.73,0.26,0.01,0.03,1.02


In [3]:
#категориальная целевая переменная
features = data.drop('Genre', axis=1) 
target = data['Genre']
label_encoder = LabelEncoder()
for column in features.columns:
    if features[column].dtype == 'object':
        features[column] = label_encoder.fit_transform(features[column])

In [4]:
kfold = KFold(n_splits=5, shuffle=True)

In [5]:
for train_index, test_index in kfold.split(features):
    X_train, X_test = features.iloc[train_index], features.iloc[test_index]
    y_train, y_test = target.iloc[train_index], target.iloc[test_index]

    print("\n")

    model_dt = DecisionTreeClassifier()
    model_dt.fit(X_train, y_train)
    y_pred_dt = model_dt.predict(X_test)
    accuracy_dt = accuracy_score(y_test, y_pred_dt)
    print(f"Decision Tree Accuracy: {accuracy_dt}")
    
    model_svm = SVC()
    model_svm.fit(X_train, y_train)
    y_pred_svm = model_svm.predict(X_test)
    accuracy_svm = accuracy_score(y_test, y_pred_svm)
    print(f"SVM Accuracy: {accuracy_svm}")

    model_knn = KNeighborsClassifier()
    model_knn.fit(X_train, y_train)
    y_pred_knn = model_knn.predict(X_test)
    accuracy_knn = accuracy_score(y_test, y_pred_knn)
    print(f"K-Nearest Neighbors Accuracy: {accuracy_knn}")

    model_rf = RandomForestClassifier()
    model_rf.fit(X_train, y_train)
    y_pred_rf = model_rf.predict(X_test)
    accuracy_rf = accuracy_score(y_test, y_pred_rf)
    print(f"Random Forest Accuracy: {accuracy_rf}")

    print("\n")
    print("Набор предсказаний дерева решений: ",y_pred_dt)
    print("Набор предсказаний опорных векторов: ",y_pred_svm)
    print("Набор предсказаний к-ближайших соседей: ",y_pred_knn)
    print("Набор предсказаний рандомного дерева: ",y_pred_rf)
    print("\n")




Decision Tree Accuracy: 0.2336448598130841
SVM Accuracy: 0.205607476635514
K-Nearest Neighbors Accuracy: 0.16822429906542055
Random Forest Accuracy: 0.2803738317757009


Набор предсказаний дерева решений:  ['Misc' 'Adventure' 'Platform' 'Action' 'Sports' 'Fighting' 'Sports'
 'Shooter' 'Misc' 'Role-Playing' 'Fighting' 'Simulation' 'Sports'
 'Role-Playing' 'Role-Playing' 'Platform' 'Action' 'Action' 'Misc'
 'Puzzle' 'Shooter' 'Sports' 'Fighting' 'Sports' 'Misc' 'Action' 'Action'
 'Role-Playing' 'Action' 'Simulation' 'Misc' 'Sports' 'Racing'
 'Role-Playing' 'Simulation' 'Fighting' 'Misc' 'Sports' 'Fighting'
 'Sports' 'Sports' 'Platform' 'Sports' 'Sports' 'Misc' 'Sports'
 'Role-Playing' 'Adventure' 'Role-Playing' 'Action' 'Shooter' 'Platform'
 'Action' 'Role-Playing' 'Fighting' 'Strategy' 'Misc' 'Simulation'
 'Adventure' 'Shooter' 'Action' 'Misc' 'Platform' 'Role-Playing'
 'Adventure' 'Platform' 'Sports' 'Shooter' 'Action' 'Sports' 'Adventure'
 'Action' 'Sports' 'Action' 'Fighting' 'Acti

In [6]:
#числовая целевая переменная
features_1 = data[['NA_Sales']] 
target_1 = data['NA_Sales']

In [7]:
label_encoder = LabelEncoder()
for column in features_1.columns:
    if features_1[column].dtype == 'object':
        features_1[column] = label_encoder.fit_transform(features_1[column])

In [8]:
for train_index, test_index in kfold.split(features_1):
    X_train, X_test = features_1.iloc[train_index], features_1.iloc[test_index]
    y_train, y_test = target_1.iloc[train_index], target_1.iloc[test_index]

    # Линейная регрессия
    model_linear = LinearRegression()
    model_linear.fit(X_train, y_train)
    y_pred_linear = model_linear.predict(X_test)
    mse_linear = mean_squared_error(y_test, y_pred_linear)
    print(f'Linear Regression MSE: {mse_linear}')

    # Полиномиальная регрессия
    polynomial_features = PolynomialFeatures(degree=2)
    X_train_poly = polynomial_features.fit_transform(X_train)
    X_test_poly = polynomial_features.transform(X_test)

    model_poly = LinearRegression()
    model_poly.fit(X_train_poly, y_train)
    y_pred_poly = model_poly.predict(X_test_poly)
    mse_poly = mean_squared_error(y_test, y_pred_poly)
    print(f'Polynomial Regression MSE: {mse_poly}')

    # Гребневая регрессия
    model_ridge = Ridge()
    model_ridge.fit(X_train, y_train)
    y_pred_ridge = model_ridge.predict(X_test)
    mse_ridge = mean_squared_error(y_test, y_pred_ridge)
    print(f'Ridge Regression MSE: {mse_ridge}')

    # Лассо-регрессия
    model_lasso = Lasso()
    model_lasso.fit(X_train, y_train)
    y_pred_lasso = model_lasso.predict(X_test)
    mse_lasso = mean_squared_error(y_test, y_pred_lasso)
    print(f'Lasso Regression MSE: {mse_lasso}')

    print("\n")
    print("Набор предсказаний линейной регрессии: ",y_pred_linear)
    print("Набор предсказаний полиномиальной регрессии: ",y_pred_poly)
    print("Набор предсказаний гребневой регрессии: ",y_pred_ridge)
    print("Набор предсказаний лассо-регрессии: ",y_pred_lasso)
    print("\n")


Linear Regression MSE: 5.3777200731807466e-31
Polynomial Regression MSE: 2.7278280003824906e-30
Ridge Regression MSE: 0.0002880042624836007
Lasso Regression MSE: 2.3106174038125604


Набор предсказаний линейной регрессии:  [-1.11022302e-16  1.51000000e+00 -1.11022302e-16  6.00000000e-02
  8.00000000e-01  9.00000000e-02  1.49700000e+01  1.90000000e-01
  6.50000000e-01  1.20000000e-01 -1.11022302e-16 -1.11022302e-16
  1.16000000e+00 -1.11022302e-16  8.00000000e-02 -1.11022302e-16
  3.50000000e-01  7.70000000e-01  1.10000000e-01  1.00000000e-01
  3.54000000e+00 -1.11022302e-16  9.00000000e-02  2.20000000e-01
 -1.11022302e-16  6.00000000e-02  1.00000000e-02  2.60000000e-01
 -1.11022302e-16 -1.11022302e-16  1.22000000e+00 -1.11022302e-16
  2.30000000e-01 -1.11022302e-16 -1.11022302e-16  6.00000000e-02
 -1.11022302e-16  6.70000000e-01 -1.11022302e-16  3.00000000e-02
  3.90000000e-01 -1.11022302e-16  1.00000000e-02  4.00000000e-02
  7.00000000e-02  7.60000000e-01  2.11000000e+00 -1.11022302e-