-
Notifications
You must be signed in to change notification settings - Fork 0
/
NB.py
80 lines (48 loc) · 2.06 KB
/
NB.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import pandas as pd
import numpy as np
from matplotlib.pylab import plt
from sklearn import model_selection
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix
data = pd.read_csv("Video Games Sales.csv")
#preproccessing yapılır
df =data.drop("index", axis=1, inplace=False)
df = df.dropna(axis=0)
le = LabelEncoder()
# Nominal veriyi numerice çevirilir
le.fit(df["Game Title"])
df["Game Title"] = le.transform(df["Game Title"])
le.fit(df["Platform"])
df["Platform"] = le.transform(df["Platform"])
le.fit(df["Genre"])
df["Genre"] = le.transform(df["Genre"])
le.fit(df["Publisher"])
df["Publisher"] = le.transform(df["Publisher"])
le.fit(df["Platform"])
df["Platform"] = le.transform(df["Platform"])
# Numeric veriyi categorical çevirilir
conditions = [(df["Rank"] >= 1) & (df["Rank"] <= 400),
(df["Rank"] > 400) & (df["Rank"] <= 800), (df["Rank"] > 800) & (df["Rank"] <= 1200), (df["Rank"] > 1200) & (df["Rank"] <= 1600), (df["Rank"] > 1600) & (df["Rank"] <= 2000)]
labels = ["very high sale","high sale","medium sale","low sale","very low sale"]
df['Rank'] = np.select(conditions, labels)
le.fit(df["Rank"])
df["Rank"] = le.transform(df["Rank"])
# Categorical veriyi numerice çevirilir
x = df[["Publisher", "Global","North America","Europe","Japan","Rest of World", "Platform", "Year"]]
y= df["Rank"]
# Kayıp verileri doldur
x = x.fillna(x.mean())
# Normalize et
scaler = StandardScaler()
x = scaler.fit_transform(x)
x_train,x_test,y_train,y_test = model_selection.train_test_split(x,y,test_size=0.4,random_state=7)
model = GaussianNB()
model.fit(x_train,y_train)
predictions = model.predict(x_test)
print("confusion matrix:",confusion_matrix(y_test,predictions))
print('mean squared error : ', mean_squared_error(y_test, predictions))
print('mean absolute error : ', mean_absolute_error(y_test, predictions))
print('accuracy : ', model.score(x_test,y_test))