In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import missingno as mn
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [None]:
# disable truncating
pd.set_option('display.max_columns', None)

# import data
df = pd.read_csv('data/vgsales.csv')

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
#missing matrix
mn.matrix(df)

In [None]:
df.shape

In [None]:
#missing columns
df.isnull().any()

In [None]:
# drop missing rows
df = df.dropna()
df.isnull().any()

In [None]:
# most sold games
df_game = df.sort_values('Global_Sales',ascending=False)
df_game = df_game.head(20)
plot = df_game.plot(x='Name',
                    y='Global_Sales',
                    kind='bar',
                    title = "Yıllara Göre En Çok Satan Oyunlar",
                    xlabel = "Oyun İsmi",
                    ylabel= "Küresel Satış"
                    )
plot

In [None]:
# year - global sales
year_df = df.groupby('Year').sum()
year_df = year_df.reset_index()
year_df.plot(x='Year',
             y=['Global_Sales','NA_Sales','EU_Sales','JP_Sales','Other_Sales'],
                    title = "Yıllara Göre Satış Miktarları",
                    xlabel = "Yıl",
                    ylabel= "Satış",
             )

year_df.plot(
    x='Year',
    y='Global_Sales',
    kind='bar',
    title = "Yıllara Göre Küresel Satış Miktarları",
    xlabel = "Yıl",
    ylabel= "Küresel Satış"
)

In [None]:
# year - global sales
year_df = df.groupby('Year').count()
year_df = year_df.reset_index()

year_df.plot(
    x='Year',
    y='Global_Sales',
    kind='bar',
    title = "Yıllara Göre Oyun Verileri",
    xlabel = "Yıl",
    ylabel= "Oyun Verisi"
)


In [None]:
# publisher - Game count
publisher_game_count = df.groupby('Publisher')['Rank'].count().reset_index()
publisher_game_count = publisher_game_count.sort_values('Rank',ascending=False)
publisher_game_count = publisher_game_count.head(20)
publisher_game_count.plot(x='Publisher',
                            y='Rank',
                            kind='bar',
                            title = "Yayımcılara Göre Oyun Sayıları",
                            xlabel = "Yayımcı",
                            ylabel= "Oyun Sayısı"
                          )

In [None]:
# publisher - Global sales
publisher_global_sales = df.groupby('Publisher').sum()
publisher_global_sales = publisher_global_sales.reset_index()
publisher_global_sales = publisher_global_sales.sort_values('Global_Sales',ascending=False)
publisher_global_sales = publisher_global_sales.head(20)
publisher_global_sales.plot(x='Publisher',
                            y='Global_Sales',
                            kind='bar',
                            title = "Yayımcılara Göre Küresel Oyun Satışları",
                            xlabel = "Yayımcı",
                            ylabel= "Küresel Satış Miktarı"
                            )


In [None]:
# platform - game count
platform_game_count = df.groupby('Platform')['Rank'].count().reset_index()
platform_game_count = platform_game_count.sort_values('Rank',ascending=False)
platform_game_count = platform_game_count.head(20)
platform_game_count.plot(x='Platform',
                         y='Rank',
                         kind='bar',
                        title = "Platformlara Göre Çıkan Oyun Sayıları",
                        xlabel = "Platform",
                        ylabel= "Oyun Sayısı"
                        )

In [None]:
# platform - global sales
platform_global_sales = df.groupby('Platform').sum()
platform_global_sales = platform_global_sales.reset_index()
platform_global_sales = platform_global_sales.sort_values('Rank',ascending=False)
platform_global_sales = platform_global_sales.head(20)
platform_global_sales.plot(x='Platform',
                           y='Rank',
                           kind='bar',
                            title = "Platformlara Göre Küresel Oyun Satışları",
                            xlabel = "Platform",
                            ylabel= "Küresel Satış Miktarı"
                           )


In [None]:
df_corr = df[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales']]
df_corr.corr()

In [None]:
x = df[['JP_Sales']]
y = df['Global_Sales']

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)

linear_regressor = LinearRegression()
linear_regressor.fit(x_train, y_train)
y_pred = linear_regressor.predict(x_test)

plt.scatter(x_test, y_test)
plt.plot(x_test, y_pred, color='red')

r2_poly = r2_score(y_test,y_pred)
r2_poly

In [None]:
x = df[['EU_Sales']]
y = df['Global_Sales']

x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)

linear_regressor = LinearRegression()
linear_regressor.fit(x_train, y_train)
y_pred = linear_regressor.predict(x_test)

plt.scatter(x_test, y_test,s=10)
plt.plot(x_test, y_pred, color='red')

r2_poly = r2_score(y_test,y_pred)
r2_poly

In [None]:
year_df = df.groupby('Year').mean()
year_df = year_df.reset_index()

x = year_df[['Year']]
y = year_df['Global_Sales']

linear_regressor = LinearRegression()
linear_regressor.fit(x, y)
y_pred = linear_regressor.predict(x)

plt.scatter(x, y,s=10)
plt.plot(x, y_pred, color='red')

r2_poly = r2_score(y,y_pred)
r2_poly

In [None]:
linear_regressor.predict([[2021]])

In [None]:
linear_regressor.predict([[2022]])

In [None]:
linear_regressor.predict([[2023]])

In [None]:
year_df = df.groupby('Year').mean()
year_df = year_df.reset_index()

x = year_df[['Year']]
y = year_df['Global_Sales']

linear_regressor = LinearRegression()
linear_regressor.fit(x, y)
y_pred = linear_regressor.predict(x)

plt.scatter(x, y,s=10)
plt.plot(x, y_pred, color='red')

r2_poly = r2_score(y,y_pred)
r2_poly

In [None]:
from sklearn.pipeline import Pipeline
Input=[('polynomial',PolynomialFeatures(degree=3)),('modal',LinearRegression())]
pipe=Pipeline(Input)
pipe.fit(x,y)
poly_pred=pipe.predict(x)

plt.figure(figsize=(10,5))
plt.scatter(x,y)
plt.plot(x,poly_pred,color='g',label='Polynomial Regression')
plt.xlabel('Predictor',fontsize=16)
plt.ylabel('Target',fontsize=16)

r2_poly = r2_score(y,poly_pred)
r2_poly

In [None]:
pipe.predict([[2021]])

In [None]:
pipe.predict([[2022]])

In [None]:
pipe.predict([[2023]])