In [1]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error

In [2]:
data = pd.read_csv('vgsales_6.csv')
data.dropna(inplace=True)
data

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,13353,Black & Bruised,GC,2003.0,Fighting,Vivendi Games,0.04,0.01,0.00,0.00,0.05
1,11530,Tempest 3X,PS,1995.0,Shooter,Interplay,0.04,0.03,0.00,0.01,0.08
2,4217,We Sing Encore,Wii,2010.0,Misc,Nordic Games,0.00,0.41,0.00,0.06,0.47
3,3796,Spider-Man,N64,2000.0,Action,Activision,0.43,0.09,0.00,0.01,0.53
4,58,Super Mario All-Stars,SNES,1993.0,Platform,Nintendo,5.99,2.15,2.12,0.29,10.55
...,...,...,...,...,...,...,...,...,...,...,...
548,3345,NBA 2K8,PS2,2007.0,Sports,Take-Two Interactive,0.50,0.02,0.00,0.08,0.60
549,4948,Marvel: Ultimate Alliance,PS3,2006.0,Role-Playing,Activision,0.33,0.02,0.00,0.04,0.39
550,10150,The Fairly Odd Parents: Breakin Da Rules,XB,2003.0,Platform,THQ,0.08,0.02,0.00,0.00,0.11
551,16584,Fit & Fun,Wii,2011.0,Sports,Unknown,0.00,0.01,0.00,0.00,0.01


In [3]:
features = data.drop('Genre', axis=1) 
target = data['Genre']
label_encoder = LabelEncoder()
for column in features.columns:
    if features[column].dtype == 'object':
        features[column] = label_encoder.fit_transform(features[column])

In [4]:
kfold = KFold(n_splits=5, shuffle=True)

In [5]:
for train_index, test_index in kfold.split(features):
    X_train, X_test = features.iloc[train_index], features.iloc[test_index]
    y_train, y_test = target.iloc[train_index], target.iloc[test_index]

    print("\n")

    model_dt = DecisionTreeClassifier()
    model_dt.fit(X_train, y_train)
    y_pred_dt = model_dt.predict(X_test)
    accuracy_dt = accuracy_score(y_test, y_pred_dt)
    print(f"Decision Tree Accuracy: {accuracy_dt}")
    
    model_svm = SVC()
    model_svm.fit(X_train, y_train)
    y_pred_svm = model_svm.predict(X_test)
    accuracy_svm = accuracy_score(y_test, y_pred_svm)
    print(f"SVM Accuracy: {accuracy_svm}")

    model_knn = KNeighborsClassifier()
    model_knn.fit(X_train, y_train)
    y_pred_knn = model_knn.predict(X_test)
    accuracy_knn = accuracy_score(y_test, y_pred_knn)
    print(f"K-Nearest Neighbors Accuracy: {accuracy_knn}")

    model_rf = RandomForestClassifier()
    model_rf.fit(X_train, y_train)
    y_pred_rf = model_rf.predict(X_test)
    accuracy_rf = accuracy_score(y_test, y_pred_rf)
    print(f"Random Forest Accuracy: {accuracy_rf}")

    print("\n")
    print("Набор предсказаний дерева решений: ",y_pred_dt)
    print("Набор предсказаний опорных векторов: ",y_pred_svm)
    print("Набор предсказаний к-ближайших соседей: ",y_pred_knn)
    print("Набор предсказаний рандомного дерева: ",y_pred_rf)
    print("\n")




Decision Tree Accuracy: 0.18518518518518517
SVM Accuracy: 0.19444444444444445
K-Nearest Neighbors Accuracy: 0.18518518518518517
Random Forest Accuracy: 0.2222222222222222


Набор предсказаний дерева решений:  ['Simulation' 'Racing' 'Misc' 'Simulation' 'Racing' 'Platform' 'Shooter'
 'Action' 'Action' 'Adventure' 'Sports' 'Simulation' 'Misc' 'Action'
 'Role-Playing' 'Role-Playing' 'Misc' 'Fighting' 'Strategy' 'Fighting'
 'Sports' 'Adventure' 'Racing' 'Racing' 'Shooter' 'Racing' 'Sports'
 'Sports' 'Racing' 'Platform' 'Adventure' 'Action' 'Misc' 'Misc' 'Shooter'
 'Puzzle' 'Sports' 'Adventure' 'Action' 'Action' 'Puzzle' 'Sports'
 'Shooter' 'Racing' 'Action' 'Fighting' 'Racing' 'Platform' 'Misc'
 'Action' 'Role-Playing' 'Shooter' 'Action' 'Shooter' 'Sports' 'Racing'
 'Racing' 'Misc' 'Role-Playing' 'Shooter' 'Action' 'Shooter' 'Sports'
 'Role-Playing' 'Racing' 'Misc' 'Simulation' 'Misc' 'Action' 'Shooter'
 'Sports' 'Misc' 'Sports' 'Sports' 'Simulation' 'Adventure' 'Puzzle'
 'Misc' 'Racing' 

In [6]:
#числовая целевая переменная
features_1 = data[['NA_Sales']] 
target_1 = data['NA_Sales']

In [7]:
label_encoder = LabelEncoder()
for column in features_1.columns:
    if features_1[column].dtype == 'object':
        features_1[column] = label_encoder.fit_transform(features_1[column])

In [8]:
for train_index, test_index in kfold.split(features_1):
    X_train, X_test = features_1.iloc[train_index], features_1.iloc[test_index]
    y_train, y_test = target_1.iloc[train_index], target_1.iloc[test_index]

    # Линейная регрессия
    model_linear = LinearRegression()
    model_linear.fit(X_train, y_train)
    y_pred_linear = model_linear.predict(X_test)
    mse_linear = mean_squared_error(y_test, y_pred_linear)
    print(f'Linear Regression MSE: {mse_linear}')

    # Полиномиальная регрессия
    polynomial_features = PolynomialFeatures(degree=2)
    X_train_poly = polynomial_features.fit_transform(X_train)
    X_test_poly = polynomial_features.transform(X_test)

    model_poly = LinearRegression()
    model_poly.fit(X_train_poly, y_train)
    y_pred_poly = model_poly.predict(X_test_poly)
    mse_poly = mean_squared_error(y_test, y_pred_poly)
    print(f'Polynomial Regression MSE: {mse_poly}')

    # Гребневая регрессия
    model_ridge = Ridge()
    model_ridge.fit(X_train, y_train)
    y_pred_ridge = model_ridge.predict(X_test)
    mse_ridge = mean_squared_error(y_test, y_pred_ridge)
    print(f'Ridge Regression MSE: {mse_ridge}')

    # Лассо-регрессия
    model_lasso = Lasso()
    model_lasso.fit(X_train, y_train)
    y_pred_lasso = model_lasso.predict(X_test)
    mse_lasso = mean_squared_error(y_test, y_pred_lasso)
    print(f'Lasso Regression MSE: {mse_lasso}')

    print("\n")
    print("Набор предсказаний линейной регрессии: ",y_pred_linear)
    print("Набор предсказаний полиномиальной регрессии: ",y_pred_poly)
    print("Набор предсказаний гребневой регрессии: ",y_pred_ridge)
    print("Набор предсказаний лассо-регрессии: ",y_pred_lasso)
    print("\n")


Linear Regression MSE: 2.472922190798706e-31
Polynomial Regression MSE: 2.099857133418769e-32
Ridge Regression MSE: 1.9763047858682048e-05
Lasso Regression MSE: 0.647621091070816


Набор предсказаний линейной регрессии:  [1.38777878e-16 5.99000000e+00 5.00000000e-02 1.38777878e-16
 1.02000000e+00 1.38777878e-16 8.60000000e-01 1.38777878e-16
 1.38777878e-16 3.00000000e-02 4.90000000e-01 2.10000000e-01
 1.50000000e-01 3.00000000e-02 1.38777878e-16 4.00000000e-02
 1.38777878e-16 5.80000000e-01 1.38777878e-16 1.38777878e-16
 1.38777878e-16 1.38777878e-16 1.90000000e-01 5.00000000e-02
 5.00000000e-02 9.00000000e-02 1.38777878e-16 4.21000000e+00
 1.20000000e-01 4.10000000e-01 1.38777878e-16 5.00000000e-02
 1.38777878e-16 6.60000000e-01 9.00000000e-02 2.00000000e-02
 1.38777878e-16 1.38777878e-16 1.00000000e-02 3.00000000e-02
 1.38777878e-16 7.00000000e-02 4.00000000e-02 2.00000000e-02
 1.38777878e-16 2.29000000e+00 1.50000000e-01 3.20000000e-01
 1.40000000e-01 8.00000000e-02 1.38777878e-16 1