In [1]:
#pip install scikit-learn
#pip install pandas

# Main imports pandas and sklearn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

# We may need to do some web scraping to get the review text
import urllib.request
from bs4 import BeautifulSoup

#Other imports
import time
import sys
from os import system
from IPython.display import clear_output

# Load in data
### There are a few steps to getting clean data
- Read data from kaggle into dataframes
- Get rid of unecessary features of the data
- (For game spot data) get the review text from the web
- Clean data using dropna

## Read in data from csv

In [2]:
# download dataset at: https://www.kaggle.com/datasets/joyshil0599/multi-decade-video-game-review-dataset
GAME_SPOT_DATASET_PATH = "data/game_spot_data.csv"
# downoad dataset at: https://www.kaggle.com/datasets/andrewmvd/steam-reviews
STEAM_DATASET_PATH = 'data/steam_data.csv'
# download dataset at: https://www.kaggle.com/datasets/noahx1/elden-ring-steam-reviews
ELDEN_RING_DATASET_PATH = "data/elden_ring_data.csv"

# Read in the datasets
game_spot_data = pd.read_csv(GAME_SPOT_DATASET_PATH)
steam_data = pd.read_csv(STEAM_DATASET_PATH)
elden_ring_data = pd.read_csv(ELDEN_RING_DATASET_PATH)

In [3]:
# Lets only keep the columns we want
game_spot_cols = ['Review link','Rating/10']
if('Review' in game_spot_data.columns):
    game_spot_cols.append('Review')
steam_cols = ['review_text','review_score']
elden_ring_cols = ['voted_up','review']

game_spot_data = game_spot_data[game_spot_cols]
steam_data = steam_data[steam_cols]
elden_ring_data = elden_ring_data[elden_ring_cols]

# Lets rename some columns and change them to be consistant
steam_data['review_score'].replace({1:True,-1:False},inplace=True)
steam_data.rename(columns={'review_score':'up_vote','review_text':'Review'}, inplace=True)
elden_ring_data.rename(columns={'voted_up':'up_vote','review':'Review'},inplace=True)

# Read in reviews from web (for game spot data)

In [4]:
# Now lets define how to get the text of the reviews from the web
from multiprocessing.dummy import Pool as ThreadPool
def ScrapeSingle(link): 
    system('cls')
    print(time.time())
    try:
#        print("Scraping Single")
        page = urllib.request.urlopen(link).read()
        page = BeautifulSoup(page)
        review = ""
        body = page.find(class_="article-body typography-format")
        paragraphs = body.find_all("p")
        if(len(paragraphs)==0):
            raise Exception('NO REVIEW TEXT FOUND')
        for p in paragraphs:
            review+=p.text+" "
        return review
    except:
#        print("Scraping Single")
        return None

        
def GetReviewsFromWebParallel(num_threads=2048):
    print("In Parallel")
    num_processed = 1
    prev_time = time.time()
    check_interval = 5    
    num_links = len(game_spot_data.index)
    print("Making pool")
    pool = ThreadPool(num_threads)
    print("Mapping Pool")
    results = pool.map(ScrapeSingle, game_spot_data['Review link'])
    print("Closing Pool")
    pool.close()
    print("Joining Pool")
    pool.join()
    print("Pool joined.")
    reviews = []
    failed_links = []
    print("Entering for loop with zip")
    for r, link in zip(results, game_spot_data['Review link']):
               # Display progress
        num_processed = num_processed + 1
        if(num_processed % check_interval == 0):
            prediction = (time.time() - prev_time)/(check_interval) * (num_links-len(reviews))
            prev_time = time.time()
            system('cls')
            clear_output(wait=True)
            print(f"Reviews lost: {len(failed_links)}")
            print(f"processed: {num_processed} / {num_links}")
            print(f"Time left: {int(prediction)} s")  
        if r is None:
            print("No review")
            reviews.append(None)
            failed_links.append(link)
        else:
            print("Appending review")
            reviews.append(r)  
    game_spot_data['Review'] = reviews
        
def GetReviewsFromWeb():
    num_processed = 1
    prev_time = time.time()
    num_links = len(game_spot_data.index)
    check_interval = 5
    reviews = []
    failed_links = []
    for link in game_spot_data['Review link']:
        # Try to get the review text
        try:
            page = urllib.request.urlopen(link).read()
            page = BeautifulSoup(page)
            review = ""
            body = page.find(class_="article-body typography-format")
            paragraphs = body.find_all("p")
            if(len(paragraphs)==0):
                raise Exception('NO REVIEW TEXT FOUND')
            for p in paragraphs:
                review+=p.text+" "
            reviews.append(review)
        # If we could not get the review text
        except:
            reviews.append(None)
            failed_links.append(link)
        # Display progress
        num_processed = num_processed + 1
        if(num_processed % check_interval == 0):
            prediction = (time.time() - prev_time)/(check_interval) * (num_links-len(reviews))
            prev_time = time.time()
            system('cls')
            clear_output(wait=True)
            print(f"Reviews lost: {len(failed_links)}")
            print(f"processed: {num_processed} / {num_links}")
            print(f"Time left: {int(prediction)} s")  
    # Update dataframe and write to file
    game_spot_data['Review'] = reviews
    

In [None]:
# If gamespot data does not have review then we need to get it from the web
if(not 'Review' in game_spot_data.columns):
    print("No reviews. Calling Parallel")
    GetReviewsFromWebParallel()
    print("Called Parallel")
    game_spot_data.to_csv(GAME_SPOT_DATASET_PATH, index=False)

No reviews. Calling Parallel
In Parallel
Making pool
Mapping Pool
1681848333.0258036
1681848333.0563226
1681848333.0723262
1681848333.073326
1681848333.073326
1681848333.073326
1681848333.074326
1681848333.074326
1681848333.074326
1681848333.074326
1681848333.074326
1681848333.074326
1681848333.074326
1681848333.0753262
1681848333.0753262
1681848333.09833171681848333.1143365
1681848333.1153352
1681848333.130339
1681848333.150854
1681848333.1959286

1681848333.2536197
1681848333.3195176
1681848333.365039
1681848333.365039
1681848333.365039
1681848333.389045
1681848333.4645758
1681848333.5065851
1681848333.5257957
1681848333.5257957
1681848333.586203
1681848333.8133957
1681848334.2196276
1681848337.0764549
1681848337.124015
1681848337.1545298
1681848337.1555302
1681848337.1555302
1681848337.1658435
1681848337.1765747
1681848337.1765747
1681848337.1869757
1681848337.2128477
1681848337.2325175
1681848337.2973528
1681848337.2973528
1681848337.3266294
1681848337.3266294
1681848337.3564043
16

1681848352.96446731681848352.9768336
1681848353.0139048
1681848353.0139048
1681848353.0219684
1681848353.0219684
1681848353.043145
1681848353.043145
1681848353.043145
1681848353.0642915
1681848353.0642915
1681848353.0642915
1681848353.076281
1681848353.0843804
1681848353.0843804
1681848353.1042922
1681848353.1255007
1681848353.1494691
1681848353.1494691
1681848353.1699245
1681848353.1779702
1681848353.1779702
1681848353.1779702
1681848353.1779702
1681848353.2066286
1681848353.2066286
1681848353.2384067
1681848353.2384067
1681848353.2384067
1681848353.2626176
1681848353.2626176
1681848353.2626176
1681848353.2888155
1681848353.2888155
1681848353.3129554
1681848353.3129554
1681848353.332689
1681848353.349724
1681848353.3669095
1681848353.4121485
1681848353.4121485
1681848353.4216118
1681848353.4216118
1681848353.4216118
1681848353.42874
1681848353.46864
1681848353.46864
1681848353.5301516
1681848353.5301516
1681848353.5551593
1681848353.5621614
1681848353.583805
1681848353.583805
16818483

1681848366.81906751681848366.8204567
1681848366.843587
1681848366.843587
1681848366.8946424
1681848366.912294
1681848366.9867895
1681848366.9981682
1681848367.0181792
1681848367.0631225
1681848367.0631225
1681848367.1426978
1681848367.1856704
1681848367.23547
1681848367.2454886
1681848367.2454886
1681848367.2454886
1681848367.2464888
1681848367.2629888
1681848367.2639925
1681848367.2639925
1681848367.3355443
1681848367.3355443
1681848367.3506765
1681848367.3506765
1681848367.392189
1681848367.4297316
1681848367.437248
1681848367.4678562
1681848367.4698713
1681848367.492169
1681848367.5021996
1681848367.5021996
1681848367.5409696
1681848367.55762
1681848367.55762
1681848367.6108673
1681848367.6261513
1681848367.6261513
1681848367.6500018
1681848367.6500018
1681848367.7129567
1681848367.7139578
1681848367.7566702
1681848367.7566702
1681848367.7566702
1681848367.8143947
1681848367.8325706
1681848367.8668146
1681848367.8668146
1681848367.9077091
1681848367.9537199
1681848367.9717245
168184

1681848623.0154412
1681848628.70120221681848629.27803421681848629.3259265

1681848629.6326647

1681848630.3173928
1681848630.347498
1681848630.411252
1681848637.3728747
1681848638.71528671681848638.71638

1681848638.8565395
1681848638.8575425
1681848638.9187212
1681848639.1235433
1681848639.2005308
1681848639.242244
1681848639.311462
1681848639.5169988
1681848639.7007942
1681848640.265659
1681848640.2959025
1681848648.8957205
1681848651.665949
1681848702.7592149
1681848706.2217581
1681848711.0302439
1681848711.2651634
1681848718.8905225
1681848718.9851758
1681848719.10877
1681848719.2984734
1681848719.5180109
1681848719.5810504
1681848719.5810504
1681848719.6585505
1681848719.9860811
1681848720.126841
1681848720.1756403
1681848720.1756403
1681848720.1756403
1681848720.2353182
1681848720.3622823
1681848720.517827
1681848721.0340629
1681848721.1110687
1681848721.11334
1681848721.1741211
1681848721.1908662
1681848721.6583865
1681848721.9062974
1681848721.9785807
1681848722.217597
16818487

1681848965.93086961681848965.9647748
1681848966.7502925

1681848968.5889804
1681848969.6830134
1681848969.7792482
1681848969.9497323
1681848970.0502422
1681848970.0502422
1681848970.0771108
1681848970.1255257
1681848970.5466373
1681848970.6753824
1681848971.1636577
1681848971.2870586
1681848971.3385525
1681848971.385525
1681848971.4016852
1681848971.621419
1681848971.6684637
1681848971.8244748
1681848971.8560967
1681848971.9486506
1681848971.950181
1681848972.1200442
1681848972.2771544
1681848972.3568077
1681848972.3817463
1681848972.4031112
1681848972.5286262
1681848972.701233
1681848972.7505665
1681848972.8748608
1681848972.906282
1681848973.0148592
1681848973.3448043
1681848974.20934081681848974.3339534
1681848974.3499277
1681848974.350979
1681848974.5664802
1681848974.625333
1681848974.625333

1681848975.073202
1681848975.6962
1681848976.3109875
1681848978.9230132
1681848979.78054261681848980.2024338

1681848980.3587346
1681848980.5767725
1681848980.5921452
1681848980.9560037
16818

1681849210.4710743
1681849212.475756
1681849212.6302867
1681849212.866957
1681849212.8980012
1681849212.9143753
1681849212.960676
1681849212.9760258
1681849212.988515
1681849213.0088027
1681849213.037134
1681849213.0522797
1681849213.0870543
1681849213.3073173
1681849213.324343
1681849213.4663017
1681849213.4762762
1681849213.5143
1681849213.5320227
1681849213.5616133
1681849213.5616133
1681849213.7784376
1681849213.841599
1681849213.9821515
1681849213.9972498
1681849214.122455
1681849214.647235
1681849214.6852043
1681849214.7028205
1681849214.8933823
1681849214.9030018
1681849214.965122
1681849214.983947
1681849215.2008796
1681849215.2643535
1681849215.266896
1681849215.7506669
1681849216.1295688
1681849216.303694
1681849216.3333278
1681849216.66258
1681849216.866489
1681849216.9911394
1681849218.2068706
1681849219.3984413
1681849220.2388265
1681849221.4487965
1681849221.686529
1681849222.092466
1681849222.3630834
1681849222.3965497
1681849222.6445518
1681849222.759579
1681849222.9180

1681849353.32877561681849353.3735034

1681849353.436577
1681849353.627511
1681849353.6414244
1681849353.6710243
1681849353.7805452
1681849353.8907037
1681849354.2661452
1681849354.37407
1681849354.4400787
1681849354.5225708
1681849354.828513
1681849355.1389878
1681849356.330962
1681849357.237499
1681849357.7434046
1681849358.697506
1681849358.7323027
1681849360.9114227
1681849361.3380277
1681849362.5636694
1681849362.5792193
1681849362.5802464
1681849362.7151873
1681849362.794528
1681849362.9300585
1681849363.0589192
1681849363.0624912
1681849363.1207471681849363.1548834
1681849363.216269
1681849363.2316265
1681849363.2776074
1681849363.3565867
1681849363.5186055

1681849363.6269841
1681849363.7682877
1681849363.9582222
1681849364.675547
1681849364.7527888
1681849365.0219748
1681849365.069342
1681849365.6677647
1681849366.165021
1681849366.5034516
1681849371.0743515
1681849372.014604
1681849372.2857037
1681849372.6452062
1681849372.9416108
1681849372.9618738
1681849373.0961587
16818493

1681849513.8300341681849513.9734743
1681849514.0187054
1681849514.0519822
1681849514.1677396
1681849514.6278536

1681849514.733313
1681849514.7702737
1681849515.0502443
1681849515.0992439
1681849515.2342207
1681849515.4599214
1681849516.0512779
1681849516.383775
1681849516.8221557
1681849516.85225
1681849517.776042
1681849517.7933273
1681849518.088581
1681849518.3397558
1681849518.9001205
1681849518.9910643
1681849519.6022964
1681849520.2250392
1681849520.4154606
1681849520.7238927
1681849522.76015351681849522.890123
1681849522.9194002

1681849523.089783
1681849523.5672615
1681849524.04494
1681849524.1398077
1681849524.3233657
1681849524.4223542
1681849524.46376
1681849524.4966908
1681849524.580068
1681849524.7842498
1681849524.8290555
1681849525.0122812
1681849525.199293
1681849526.4062455
1681849527.25294381681849527.2679648
1681849527.3777838

1681849527.848417
1681849527.913041
1681849528.1955018
1681849528.7583892
1681849530.384467
1681849533.0846958
1681849533.2690735
1681849533.

1681849723.9450771681849723.9759731

1681849724.1340008
1681849724.3383918
1681849724.3534136
1681849724.591708
1681849724.9315648
1681849724.9936903
1681849725.0104465
1681849725.011472
1681849725.5099132
1681849725.8433363
1681849725.8585656
1681849726.0635526
1681849726.092867
1681849726.1089985
1681849726.5331748
1681849727.266885
1681849727.5579166
1681849729.77056481681849730.0369456
1681849730.066658
1681849730.9738162

1681849731.2559261
1681849731.297438
1681849731.2984538
1681849732.4265265
1681849732.551495
1681849732.599521
1681849732.6462572
1681849733.65399
1681849733.8735733
1681849733.9514346
1681849734.5961106
1681849734.6283731
1681849734.7040322
1681849734.889429
1681849734.9056938
1681849735.1211936
1681849735.2960546
1681849735.2960546
1681849735.4838903
1681849735.5005991
1681849735.5171053
1681849735.8440251
1681849735.8607588
1681849735.9706202
1681849736.0350184
1681849736.0975535
1681849736.3208635
1681849736.3964155
1681849736.4742103
1681849736.4901934
16818

1681849878.8718705
1681849880.408789
1681849881.03455
1681849881.6813247
1681849882.4362402
1681849883.0425677
1681849884.249184
1681849884.7563014
1681849885.8359382
1681849886.1802807
1681849886.2089922
1681849887.091962
1681849891.2770994
1681849891.632726
1681849893.2070644
1681849893.5087419
1681849893.585441
1681849893.883333
1681849894.573579
1681849894.8411865
1681849894.9532645
1681849894.9547057
1681849894.99723
1681849895.52338051681849895.8939526

1681849895.961025
1681849897.046207
1681849898.183717
1681849901.6743875
1681849901.9617012
1681849903.9924366
1681849905.499952
1681849905.595267
1681849905.7982535
1681849905.8080149
1681849906.033744
1681849906.735121681849906.973064

1681849907.4542136
1681849907.7550955
1681849908.20863
1681849909.435919
1681849910.2850702
1681849912.766701
1681849914.1792867
1681849914.508627
1681849914.6181874
1681849915.1519113
1681849915.4690928
1681849915.5162604
1681849915.5283885
1681849915.7370293
1681849916.392607
1681849916.4445636


1681850053.49503561681850054.514998

1681850054.8690379
1681850055.5129118
1681850055.799246
1681850055.8136654
1681850056.1111274
1681850056.1794999
1681850056.2073386
1681850056.210059
1681850056.4910471
1681850056.5231452
1681850056.6004767
1681850056.624189
1681850057.0856678
1681850057.115623
1681850057.5457857
1681850057.9921172
1681850058.1915367
1681850058.99847961681850059.0469518

1681850059.2451932
1681850061.4371002
1681850062.0961535
1681850063.0228834
1681850063.4808357
1681850065.47125581681850065.5042012
1681850065.924781
1681850066.0658252
1681850066.3476863

1681850066.474262
1681850066.7549684
1681850067.0855432
1681850067.2647092
1681850067.3034427
1681850067.3034427
1681850067.4147148
1681850067.4320745
1681850067.4342875
1681850067.4880722
1681850067.8273351
1681850067.9395995
1681850068.4535906
1681850069.816409
1681850069.8795183
1681850071.0650165
1681850071.84937831681850071.9584403

1681850073.4443808
1681850073.857633
1681850075.435431681850075.4694386
16818

1681850227.2177148
1681850227.674261
1681850227.8633575
1681850227.8633575
1681850228.1163914
1681850228.24234
1681850228.3416362
1681850228.5371056
1681850228.668526
1681850228.746616
1681850229.4159935
1681850229.5884483
1681850230.04191
1681850230.3069446
1681850230.4473152
1681850230.7123592
1681850231.1469598
1681850231.9297523
1681850232.2015922
1681850234.14621541681850234.162283
1681850234.5520723

1681850236.8378325
1681850236.8403332
1681850236.8535392
1681850237.2637494
1681850237.351903
1681850237.60392071681850237.7592845
1681850238.023549
1681850238.0854783
1681850238.1643357
1681850238.3728611

1681850238.6341178
1681850238.9158237
1681850238.9998202
1681850239.0872915
1681850239.154273
1681850239.1700888
1681850239.2750387
1681850239.2750387
1681850239.5111277
1681850239.6451476
1681850239.7167907
1681850239.7659836
1681850240.3852255
1681850240.5874047
1681850240.6508167
1681850241.1084616
1681850241.7044256
1681850242.1610422
1681850242.4374368
1681850242.595453
16818

1681850412.62289021681850413.0515606

1681850414.2076464
1681850414.5172684
1681850415.5533905
1681850415.8049703
1681850416.589848
1681850417.6369505
1681850417.7784593
1681850417.8578079
1681850418.0157695
1681850418.1600754
1681850418.2716756
1681850418.2855308
1681850418.6137424
1681850418.6300104
1681850418.7432427
1681850418.754705
1681850418.7563705
1681850418.8166273
1681850418.96005651681850418.9966226
1681850419.0109107

1681850419.2629743
1681850419.3220098
1681850419.38421
1681850419.3866184
1681850419.4013405
1681850419.605899
1681850419.7940168
1681850419.934915
1681850419.9659252
1681850420.0446594
1681850420.2465324
1681850420.5240693
1681850420.5261
1681850420.6522627
1681850421.249913
1681850421.2942214
1681850421.4071553
1681850421.4256208
1681850421.7070773
1681850421.8049114
1681850422.0007293
1681850422.3763413
1681850422.7213748
1681850423.4869194
1681850424.0181067
1681850424.7847943
1681850425.371131
1681850425.94905781681850426.232977

1681850426.7145276
16818

1681850584.0430923
1681850584.663271
1681850586.5698736
1681850586.900433
1681850587.71733551681850587.84447

1681850588.014429
1681850588.1230872
1681850589.0466852
1681850589.1669378
1681850591.4236751681850591.5379002

1681850592.6614306
1681850593.1216931
1681850594.3754241681850594.4704072
1681850594.5650089

1681850595.8366845
1681850597.874198
1681850598.413233
1681850599.132966
1681850601.2065911
1681850602.2343326
1681850603.855049
1681850604.70227
1681850604.8297637
1681850605.628168
1681850606.07203941681850606.1333861
1681850606.2316277
1681850606.6109068
1681850606.7665465

1681850609.6887531681850610.068243
1681850610.0692635

1681850611.29147
1681850611.339053
1681850611.8567367
1681850612.0482514
1681850614.38930231681850614.4694343
1681850614.6238484

1681850617.57834741681850617.58429

1681850619.04597331681850619.1471043
1681850619.501702

1681850619.6892798
1681850619.984658
1681850620.2887049
1681850620.318341
1681850620.4326665
1681850620.8877904
1681850621.380666

1681850773.701511
1681850774.1478336
1681850775.3287163
1681850776.158482
1681850778.74058561681850779.105951

1681850779.6555083
1681850780.166691
1681850780.1822045
1681850780.2027662
1681850780.3257997
1681850780.3429875
1681850780.3574545
1681850780.6771047
1681850780.7211657
1681850780.7425838
1681850780.8151255
1681850781.0022569
1681850781.0210416
1681850781.0322177
1681850781.0797772
1681850781.1140385
1681850781.115602
1681850781.1689897
1681850781.2680137
1681850781.4068744
1681850781.4255173
1681850781.644909
1681850781.6771066
1681850781.6944566
1681850781.6968439
1681850781.9376338
1681850781.9789982
1681850782.401221681850782.6990132

1681850783.4661412
1681850783.7347941
1681850788.5611548
1681850788.8809688
1681850789.396479
1681850789.90243741681850789.9108188
1681850789.970338
1681850790.0043004
1681850790.0333838
1681850790.0652344
1681850790.0764556
1681850790.1243458

1681850790.3001168
1681850790.376714
1681850790.5075536
1681850790.5728004
1681850790.5846465
1681

1681850905.3600407
1681850907.38281
1681850907.792537
1681850909.3955624
1681850910.247843
1681850910.2650363
1681850910.7129185
1681850910.904808
1681850911.0632808
1681850911.0754926
1681850911.4443595
1681850912.35247641681850912.657242

1681850913.1263769
1681850913.8666523
1681850914.9336689
1681850915.5718446
1681850915.976929
1681850916.4067802
1681850916.5288935
1681850918.41424161681850918.4332836

1681850918.9198735
1681850920.3846116
1681850920.4329646
1681850920.436778
1681850920.50195
1681850920.742756
1681850921.3190017
1681850921.551862
1681850921.5889325
1681850922.5715551
1681850922.821177
1681850923.5849564
1681850923.6418192
1681850924.3189735
1681850925.4172275
1681850926.32009
1681850929.889758
1681850930.2325811
1681850930.2801747
1681850930.2990167
1681850930.3146973
1681850930.7726738
1681850931.2729552
1681850931.9937675
1681850935.4071429
1681850939.5462935
1681850940.4174175
1681850940.496481
1681850941.8417845
1681850942.7893636
1681850942.824612
1681850942.

1681851070.29561381681851070.417579
1681851070.4807954
1681851070.697361
1681851070.8258665

1681851071.351252
1681851071.5558655
1681851071.7647655
1681851071.9812346
1681851072.1835876
1681851072.97736451681851073.18118

1681851073.307811
1681851074.2177687
1681851074.2509315
1681851074.597562
1681851074.8843167
1681851075.4365723
1681851076.1526926
1681851076.3625665
1681851077.0113633
1681851077.2621014
1681851077.3849676
1681851077.4002812
1681851077.5112047
1681851077.6683178
1681851078.2723613
1681851078.842651681851079.2803829
1681851079.3103483

1681851080.75685881681851083.8727956
1681851083.937087

1681851084.2442682
1681851084.276362
1681851084.4230814
1681851084.9281805
1681851085.1917408
1681851085.5028608
1681851085.9105005
1681851086.00011871681851086.0753653

1681851086.5910132
1681851086.9442408
1681851086.99595
1681851089.15874171681851089.2863324
1681851089.3429902

1681851090.0555153
1681851090.7290764
1681851090.8082244
1681851090.8738823
1681851090.9390056
168185

1681851228.18037751681851228.2410643
1681851228.4139817
1681851228.570112

1681851229.1852996
1681851230.2598667
1681851230.6097577
1681851230.63775
1681851230.9142358
1681851231.2863817
1681851231.36494851681851231.5815463

1681851232.1039488
1681851235.5511308
1681851235.5927658
1681851236.2898064
1681851236.8587847
1681851237.174583
1681851237.3171122
1681851237.4225113
1681851237.5945387
1681851237.7577412
1681851237.9592407
1681851238.4066732
1681851238.5478613
1681851238.7533667
1681851239.3766477
1681851239.379542
1681851239.5859182
1681851239.5889604
1681851239.6534607
1681851239.734216
1681851240.2903607
1681851241.0313656
1681851241.171477
1681851241.55482861681851241.5700688
1681851241.7565632
1681851241.7580662
1681851241.833618

1681851242.0934126
1681851242.2786338
1681851242.3901932
1681851242.5005033
1681851242.9591804
1681851243.2153618
1681851243.4933562
1681851243.5086894
1681851243.5438957
1681851243.5560226
1681851243.9777941681851244.0806687

1681851244.4280071
16

1681851355.2520459
1681851355.7655518
1681851356.022793
1681851356.1645029
1681851356.359549
1681851356.4963434
1681851357.0322521681851357.052795
1681851357.1288602
1681851357.1475391
1681851357.2884831

1681851357.3813207
1681851357.3845284
1681851357.876176
1681851358.0820243
1681851358.136421
1681851358.1949537
1681851358.4633942
1681851358.4638968
1681851358.479546
1681851358.704927
1681851358.717686
1681851358.9086657
1681851359.0354192
1681851359.5459363
1681851359.7015324
1681851360.0981297
1681851360.2390578
1681851360.7143548
1681851361.034243
1681851361.085223
1681851361.2230568
1681851361.5685718
1681851361.8225412
1681851362.1985729
1681851362.3004656
1681851362.4051712
1681851362.6638503
1681851362.9357731
1681851363.0927675
1681851363.1522715
1681851363.2800994
1681851363.407201
1681851363.685777
1681851363.7674077
1681851363.7834964
1681851363.8275485
1681851364.457344
1681851365.0087691681851365.0932548
1681851365.1350272

1681851365.4815588
1681851365.689587
168185136

1681851506.7784984
1681851506.9691496
1681851507.0415058
1681851507.567811
1681851507.6306326
1681851507.7546537
1681851508.6114633
1681851509.049887
1681851509.2996604
1681851509.459016
1681851510.60762261681851511.3302703

1681851513.3533669
1681851513.3640661
1681851513.7018688
1681851513.7662656
1681851514.1746976
1681851514.2756205
1681851514.2991364
1681851514.5051658
1681851515.0326498
1681851515.347449
1681851515.3606482
1681851515.5952113
1681851515.8435028
1681851515.96776721681851515.9697673

1681851516.6941957
1681851517.1060843
1681851517.4946973
1681851518.4924676
1681851518.554777
1681851518.617907
1681851518.757846
1681851518.8065944
1681851518.8704371
1681851519.54558871681851519.6193984
1681851520.0353954

1681851520.2884786
1681851520.7802968
1681851521.6765184
1681851522.028769
1681851522.2601159
1681851522.2700496
1681851522.4188504
1681851523.1867867
1681851523.5516493
1681851523.8397646
1681851524.1236537
1681851524.150153
1681851524.3379595
1681851524.561844
168

1681851659.67168971681851659.7244148

1681851660.0885413
1681851660.2577376
1681851661.253042
1681851661.7248645
1681851662.300546
1681851662.7552123
1681851663.2118192
1681851663.4250526
1681851663.613418
1681851663.7069275
1681851664.281467
1681851664.9706964
1681851665.0022395
1681851665.0032399
1681851665.8747537
1681851666.3224635
1681851666.98329351681851667.1852648

1681851667.2952237
1681851667.6791995
1681851668.0932806
1681851668.4116325
1681851668.8022358
1681851669.0241969
1681851669.2510877
1681851669.468527
1681851669.5267909
1681851670.37134841681851670.5113199
1681851670.8617613

1681851671.2502038
1681851671.2712228
1681851671.9448948
1681851672.9894323
1681851673.5774372
1681851673.6594603
1681851673.8573778
1681851673.9589484
1681851674.7453391681851674.9355874

1681851674.9551136
1681851675.3124993
1681851675.6454546
1681851675.88705
1681851675.8906074
1681851676.37168
1681851676.61848
1681851677.0981352
1681851677.6020684
1681851677.98274251681851678.0794992
168185

1681851794.66446421681851794.7891057
1681851794.835369
1681851794.85328
1681851794.8700829
1681851795.2204924
1681851795.2544394

1681851795.647103
1681851795.8202586
1681851795.8836472
1681851796.654928
1681851796.700345
1681851797.0664322
1681851797.0803556
1681851797.8740668
1681851798.5596828
1681851798.87414311681851798.8767009

1681851799.1242092
1681851799.3160212
1681851799.4619358
1681851799.5682023
1681851802.52635861681851803.128167
1681851803.362986
1681851803.4275303

1681851803.6301205
1681851803.775285
1681851803.8558211
1681851803.8849363
1681851804.245864
1681851804.3310935
1681851804.4522314
1681851804.4978404
1681851804.5610015
1681851804.5939062
1681851804.655007
1681851804.8530378
1681851804.8540587
1681851804.8890777
1681851804.9199774
1681851808.2448628
1681851808.3878498
1681851808.440991
1681851808.4920785
1681851808.6685503
1681851808.7150035
1681851808.9265084
1681851808.9371555
1681851808.9371555
1681851808.9565916
1681851809.3203337
1681851809.3331954
16818

1681851942.9654486
1681851943.2256382
1681851943.327471
1681851943.7039602
1681851944.4690626
1681851944.8913748
1681851944.9544988
1681851945.0962994
1681851945.1783524
1681851945.3405044
1681851945.5933306
1681851945.8765357
1681851945.956375
1681851946.06547
1681851946.20946
1681851946.5897932
1681851947.334334
1681851947.4855504
1681851947.6741889
1681851948.4131784
1681851948.6193426
1681851948.6814673
1681851948.7239332
1681851948.7417216
1681851949.1824586
1681851953.71809581681851954.3642197
1681851954.4071076
1681851955.0239818
1681851955.0422137
1681851955.107759
1681851955.1185443
1681851955.1374092
1681851955.2450762
1681851955.3407376
1681851955.4248693
1681851955.4521534
1681851955.5328412

1681851956.191047
1681851956.5445395
1681851957.3655553
1681851957.4604485
1681851958.779825
1681851960.9617317
1681851961.0877726
1681851961.3935864
1681851962.0264804
1681851962.6393383
1681851963.0280745
1681851963.0833244
1681851963.6357079
1681851963.910977
1681851963.9872458
1681

1681852103.55399681681852103.6405194
1681852103.7187364

1681852104.1184623
1681852104.5860584
1681852104.8200557
1681852104.8530216
1681852104.9945743
1681852105.0599134
1681852105.0971403
1681852105.151675
1681852105.1526754
1681852105.2360039
1681852105.425448
1681852105.4276981
1681852105.4618669
1681852105.5211396
1681852105.87764
1681852105.9421928
1681852106.0082898
1681852106.0521395
1681852106.1718981
1681852106.1787827
1681852106.2233148
1681852106.240381
1681852106.2556639
1681852106.3798492
1681852106.632298
1681852106.7124856
1681852106.9471717
1681852107.7999621681852107.8285594

1681852108.8083112
1681852110.21479871681852110.2629075

1681852111.0664687
1681852111.5569942
1681852112.1368
1681852112.2452207
1681852112.5596936
1681852113.4292524
1681852113.6348224
1681852114.85705691681852114.9077973
1681852114.966961
1681852115.1729856

1681852115.2729719
1681852115.2739947
1681852115.3351972
1681852115.411077
1681852115.5196037
1681852115.7763588
1681852115.8283455
16818

1681852348.6925836
1681852354.5806108
1681852354.779063
1681852355.89302951681852356.067995
1681852356.4132173
1681852356.490165
1681852356.5838392

1681852357.2002137
1681852357.2906463
1681852357.3719923
1681852357.8781695
1681852359.1834192
1681852359.4047794
1681852360.4680605
1681852361.0972118
1681852361.9720922
1681852362.2768552
1681852362.7898784
1681852364.3177826
1681852364.4451199
1681852364.598661
1681852364.6012225
1681852364.7486696
1681852366.4760431681852366.680827
1681852366.6997354

1681852366.9643083
1681852367.0609484
1681852367.1440737
1681852367.4918168
1681852367.5055413
1681852367.7791238
1681852367.89637
1681852367.995899
1681852368.2163334
1681852368.658652
1681852369.3163911681852369.6881502
1681852369.861688

1681852371.317209
1681852371.7373557
1681852373.7760875
1681852374.4704306
1681852374.9966671
1681852376.3699396
1681852376.4112573
1681852376.5143733
1681852376.836722
1681852376.9078796
1681852376.9118805
1681852377.0535283
1681852377.3223975
1681852

# Now lets define our classifications (Sentiment)

In [None]:
# We define 2 sentiments to classify
GOOD = 'Good'
BAD = 'Bad'

In [None]:
def GetSentimentFromRating(rating):
    if(rating is None):
        return None
    if(rating > 7):
        return GOOD
    if(rating > 0):
        return BAD

In [None]:
def GetSentimentFromUpVote(up_vote):
    if(up_vote):
        return GOOD
    else:
        return BAD

In [None]:
def AddSentiment(df):
    s = []
    if('Rating/10' in df.columns):
        for rating in game_spot_data['Rating/10'].to_list():
            s.append(GetSentimentFromRating(rating))
    elif('up_vote' in df.columns):
        for upvote in df['up_vote'].to_list():
            s.append(GetSentimentFromUpVote(upvote))
    else:
        raise Exception(f"No column in dataframe to use for sentiment! {df.columns}")
    df['Sentiment'] = s
    print("DATA DISTRIBUTION:")
    SENTIMENTS = df['Sentiment'].unique()
    for s in SENTIMENTS:
        print(f"{s}: {sum(1 for i in df['Sentiment'] if i == s)}")

In [None]:
AddSentiment(game_spot_data)
AddSentiment(steam_data)
AddSentiment(elden_ring_data)

# Clean data

In [None]:
def CleanData(df):
    # Now lets finish cleaning the data by dropping any invalid rows
    old_size = df.shape
    df.dropna(inplace=True)
    new_size = df.shape
    print("Dropping rows")
    print(f"Old count: {old_size[0]}")
    print(f"New count: {new_size[0]}")
    print(f"Removed {old_size[0]-new_size[0]} rows.")
    

In [None]:
CleanData(game_spot_data)
CleanData(steam_data)
CleanData(elden_ring_data)

# Now we split our data

In [None]:
def SplitData(df,test_split=0.3):
    #split datasets
    X = df['Review']
    y = df['Sentiment'] 
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_split, random_state=1) 
    X_train.to_list()
    X_test = X_test.to_list()
    y_train = y_train.to_list()
    y_test = y_test.to_list()
    return X_train, X_test, y_train, y_test 

In [None]:
game_spot_X_train, game_spot_X_test, game_spot_y_train, game_spot_y_test = SplitData(game_spot_data)
steam_X_train, steam_X_test, steam_y_train, steam_y_test = SplitData(steam_data)
elden_ring_X_train, elden_ring_X_test, elden_ring_y_train, elden_ring_y_test = SplitData(elden_ring_data)

## Vectorize the data
We will be using a method called **Term Frequency and Inverse Document Frequency (TF-IDF)**. 
Here is how it works: https://medium.com/@vasista/preparing-the-text-data-with-scikit-learn-b31a3df567e

#### sklearn supplies an easy way to implement this

In [None]:
def CreateVectorizer(X_train):
    vectorizer = TfidfVectorizer(min_df = 5,max_df = 0.8,sublinear_tf = True,use_idf = True)
    train_vectors = vectorizer.fit_transform(X_train)
    return vectorizer, train_vectors

# Creating svm
#### Helpful link to understand how this works:
- https://medium.com/@vasista/sentiment-analysis-using-svm-338d418e3ff1



In [None]:
def TestSVM(svm_linear, vectorizer, X_test, y_test): 
    print('Testing SVM ...')
    start_time = time.time()
    test_vectors = vectorizer.transform(X_test)
    prediction = svm_linear.predict(test_vectors)
    end_time = time.time()
    print(f"Tested SVM in {end_time-start_time} s")
    report = classification_report(y_test, prediction, output_dict=True, zero_division=0)
    cm = confusion_matrix(y_test, prediction)
    cm_disp = ConfusionMatrixDisplay(confusion_matrix=cm,display_labels = svm_linear.classes_)
    cm_disp.plot()
    for key in report.keys():
        print(f'{key}: {report[key]}\n')
    return report

In [None]:
def CreateSVM(x_train, y_train):
    print('training SVM...')
    start_time = time.time()
    vectorizer, train_vectors = CreateVectorizer(x_train)
    svm_linear = svm.LinearSVC()
    start_time = time.time()
    svm_linear.fit(train_vectors, y_train)
    end_time = time.time()
    print(f"Trained SVM in {end_time-start_time} s")
    return svm_linear, vectorizer

# Train SVM with game spot data

In [None]:
game_spot_svm, game_spot_vectorizer = CreateSVM(game_spot_X_train, game_spot_y_train)

### Test game_spot_svm on game_spot test data

In [None]:
report = TestSVM(game_spot_svm, game_spot_vectorizer, game_spot_X_test, game_spot_y_test)

### Test game_spot_svm on steam data

In [None]:
report = TestSVM(game_spot_svm, game_spot_vectorizer, steam_X_test, steam_y_test)

### Test game_spot_svm on elden ring data

In [None]:
report = TestSVM(game_spot_svm, game_spot_vectorizer, elden_ring_X_test, elden_ring_y_test)

# Train SVM with elden ring data

In [None]:
elden_ring_svm, elden_ring_vectorizer = CreateSVM(elden_ring_X_train, elden_ring_y_train)

### Test elden_ring_svm on elden ring test data

In [None]:
report = TestSVM(elden_ring_svm, elden_ring_vectorizer, elden_ring_X_test, elden_ring_y_test)

### Test elden_ring_svm on steam data

In [None]:
report = TestSVM(elden_ring_svm, elden_ring_vectorizer, steam_X_test, steam_y_test)

### Test elden_ring_svm on game spot data

In [None]:
report = TestSVM(elden_ring_svm, elden_ring_vectorizer, game_spot_X_test, game_spot_y_test)

# Train SVM with Steam Data

In [None]:
steam_svm, steam_vectorizer = CreateSVM(steam_X_train, steam_y_train)

### Test steam_svm on steam data

In [None]:
report = TestSVM(steam_svm, steam_vectorizer, steam_X_test, steam_y_test)

### Test steam_svm on game spot data

In [None]:
report = TestSVM(steam_svm, steam_vectorizer, game_spot_X_test, game_spot_y_test)

### Test steam_svm on elden ring data

In [None]:
report = TestSVM(steam_svm, steam_vectorizer, elden_ring_X_test, elden_ring_y_test)