In [10]:
import cx_Oracle
import pandas as pd


db_username_read = open('db_username.txt')
db_username = db_username_read.read()
db_username_read.close()

db_password_read = open('db_password.txt')
db_password = db_password_read.read()
db_password_read.close()

db_host_read = open('db_host.txt')
db_host = db_host_read.read()
db_host_read.close()

connection = cx_Oracle.connect(user=db_username, password=db_password, dsn=db_host)
cursor = connection.cursor()

# Wyciągnięcie danych z bazy
query = """
SELECT gs.game_name, p.platform_name, pub.publisher_name, d.developer_name, g.genre_name, r.rating_name, gs.na_sales, gs.eu_sales, gs.jp_sales, gs.other_sales, gs.critic_score, gs.critic_count, gs.user_score, gs.user_count, gs.release_date
FROM game_sales gs
JOIN platforms p ON gs.platform_id = p.platform_id
JOIN publishers pub ON gs.publisher_id = pub.publisher_id
JOIN developers d ON gs.developer_id = d.developer_id
JOIN genres g ON gs.genre_id = g.genre_id
JOIN ratings r ON gs.rating_id = r.rating_id
"""
cursor.execute(query)

columns = ['Game_Name', 'Platform', 'Publisher', 'Developer', 'Genre', 'Rating', 'NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales', 'Critic_Score', 'Critic_Count', 'User_Score', 'User_Count', 'Release_Date']
data = cursor.fetchall()
df = pd.DataFrame(data, columns=columns)

df.head()

Unnamed: 0,Game_Name,Platform,Publisher,Developer,Genre,Rating,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Critic_Score,Critic_Count,User_Score,User_Count,Release_Date
0,New Super Luigi U,WiiU,Nintendo,Nintendo,Platform,E,1250000,620000,180000,180000,77,59,7.9,288,2013
1,Tetris DS,DS,Nintendo,Nintendo,Puzzle,E,630000,50000,1350000,80000,84,56,8.7,44,2006
2,Classic NES Series: Super Mario Bros.,GBA,Nintendo,Nintendo,Platform,E,0,0,1390000,30000,84,14,8.6,44,2004
3,Pikmin 2,GC,Nintendo,Nintendo,Strategy,E,480000,130000,560000,30000,90,54,9.1,137,2004
4,Mario vs. Donkey Kong,GBA,Nintendo,Nintendo,Puzzle,E,680000,250000,210000,20000,81,43,8.0,31,2004


In [11]:
mean = df.mean()
median = df.median()
mode = df.mode()
std_dev = df.std()

print('Średnia:')
print(mean)
print('\nMediana:')
print(median)
print('\nDominanta:')
print(mode.iloc[0])
print('\nOdchylenie Standardowe:\n', std_dev)

Średnia:
NA_Sales        394483.513846
EU_Sales        236089.375971
JP_Sales         64158.241758
Other_Sales      82676.923077
Critic_Score        70.272088
Critic_Count        28.931136
User_Score           7.185626
User_Count         174.722344
Release_Date      2007.436777
dtype: float64

Mediana:
NA_Sales        150000.0
EU_Sales         60000.0
JP_Sales             0.0
Other_Sales      20000.0
Critic_Score        72.0
Critic_Count        25.0
User_Score           7.5
User_Count          27.0
Release_Date      2007.0
dtype: float64

Dominanta:
Game_Name       LEGO Star Wars II: The Original Trilogy
Platform                                            PS2
Publisher                               Electronic Arts
Developer                                     EA Canada
Genre                                            Action
Rating                                                T
NA_Sales                                            0.0
EU_Sales                                            

  mean = df.mean()
  median = df.median()
  std_dev = df.std()


In [12]:
stats_dict = {'Średnia': mean, 'Mediana': median, 'Dominanta': mode.iloc[0], 'Odchylenie Standardowe':  std_dev}
for stat_name, stat in stats_dict.items():
    data_to_insert = {
            'stat_name': stat_name,
            'stat_game_name': None,
            'stat_platform': None,
            'stat_publisher': None,
            'stat_developer': None,
            'stat_genre': None,
            'stat_rating': None,
            'stat_na_sales': None,
            'stat_eu_sales': None,
            'stat_jp_sales': None,
            'stat_other_sales': None,
            'stat_critic_score': None,
            'stat_critic_count': None,
            'stat_user_score': None,
            'stat_user_count': None,
            'stat_release_date': None,
        }
    for column_name, val in stat.items():
        data_to_insert['stat_' + column_name] = val
    query = """
        INSERT INTO game_sales_stats (stat_id, STATISTIC, game_name, platform, publisher, developer, genre, rating, na_sales, eu_sales, jp_sales, other_sales, critic_score, critic_count, user_score, user_count, release_date)
        VALUES (STATS_SEQ.NEXTVAL, :stat_name, :stat_game_name, :stat_platform, :stat_publisher, :stat_developer, :stat_genre, :stat_rating, :stat_na_sales, :stat_eu_sales, :stat_jp_sales, :stat_other_sales, :stat_critic_score, :stat_critic_count, :stat_user_score, :stat_user_count, :stat_release_date)
        """
    cursor.execute(query, data_to_insert)
connection.commit()
cursor.close()
connection.close()

In [15]:
df_2007 = df[df['Release_Date'] == 2007]

mean = df_2007.mean()
median = df_2007.median()
mode = df_2007.mode()
std_dev = df_2007.std()

print('Średnia:')
print(mean)
print('\nMediana:')
print(median)
print('\nDominanta:')
print(mode.iloc[0])
print('\nOdchylenie Standardowe:\n', std_dev)

Średnia:
NA_Sales        399338.981356
EU_Sales        211372.879661
JP_Sales         59389.830508
Other_Sales     102745.762712
Critic_Score        67.418644
Critic_Count        26.388136
User_Score           6.926610
User_Count          97.555932
Release_Date      2007.000000
dtype: float64

Mediana:
NA_Sales        180000.0
EU_Sales         20000.0
JP_Sales             0.0
Other_Sales      20000.0
Critic_Score        70.0
Critic_Count        24.0
User_Score           7.2
User_Count          22.0
Release_Date      2007.0
dtype: float64

Dominanta:
Game_Name       Harry Potter and the Order of the Phoenix
Platform                                              Wii
Publisher                                 Electronic Arts
Developer                                       EA Canada
Genre                                              Action
Rating                                                  E
NA_Sales                                              0.0
EU_Sales                              

  mean = df_2007.mean()
  median = df_2007.median()
  std_dev = df_2007.std()


In [16]:
df_PS3 = df[df['Platform'] == 'PS3']

mean = df_PS3.mean()
median = df_PS3.median()
mode = df_PS3.mode()
std_dev = df_PS3.std()

print('Średnia:')
print(mean)
print('\nMediana:')
print(median)
print('\nDominanta:')
print(mode.iloc[0])
print('\nOdchylenie Standardowe:\n', std_dev)

Średnia:
NA_Sales        444239.267880
EU_Sales        355656.695709
JP_Sales         70897.269181
Other_Sales     149297.789337
Critic_Score        70.953186
Critic_Count        34.921977
User_Score           6.782705
User_Count         227.490247
Release_Date      2010.091027
dtype: float64

Mediana:
NA_Sales        220000.0
EU_Sales        150000.0
JP_Sales         10000.0
Other_Sales      70000.0
Critic_Score        74.0
Critic_Count        33.0
User_Score           7.1
User_Count          52.0
Release_Date      2010.0
dtype: float64

Dominanta:
Game_Name         Madden NFL 13
Platform                    PS3
Publisher       Electronic Arts
Developer             EA Canada
Genre                    Action
Rating                        T
NA_Sales                  70000
EU_Sales                      0
JP_Sales                      0
Other_Sales               10000
Critic_Score                 74
Critic_Count                 37
User_Score                  7.5
User_Count                  

  mean = df_PS3.mean()
  median = df_PS3.median()
  std_dev = df_PS3.std()
