In [1]:
import os
from dotenv import load_dotenv
from requests import get
import requests 
import json
import pandas as pd

In [2]:
load_dotenv()
url = 'http://www.gamespot.com/api/games/?'
API_KEY = os.getenv('GAMESPOT_API_KEY')


headers = {
    'User-Agent' : 'project1', 
}

In [3]:
params = {
    'api_key': API_KEY,
    'format': 'json'
}

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
    'content-type': 'application/json; charset=utf-8',
    'Accept': '*/*',
    'Connection': 'keep-alive'
}

In [7]:
res = get(url=url, params=params,headers=headers)
print(res)

<Response [200]>


In [8]:
res.json().get('results')

[{'release_date': '1995-09-26 12:00:00',
  'description': "Welcome to the New World Order. A gritty high tech world where the art of electronic intelligence and covert surveillance reigns supreme. Where guerrilla strategies and savage combat are the norm. Build bases, muster forces and lacerate your enemies to the bone. All for the love of power. In the hyper-real combat experience of Command & Conquer, you're thrust into the heat of an all out race for global control. Will you side with the humanistic GDI or bring the world to its knees with the Brotherhood of Nod? Decide fast. Your adversaries are without mercy. And so are you.",
  'id': 1,
  'name': 'Command & Conquer',
  'deck': 'Overall Command & Conquer is a great strategy game and easily the best available on the PlayStation.',
  'image': {'square_tiny': 'https://www.gamespot.com/a/uploads/square_tiny/mig/0/9/6/2/2210962-cncbox.jpg',
   'screen_tiny': 'https://www.gamespot.com/a/uploads/screen_tiny/mig/0/9/6/2/2210962-cncbox.jpg

In [None]:
def gitgames(limit = 100, offset=0):  
    params.setdefault('limit', limit)
    params.setdefault('offset', offset)

    
    try: 
        url = "https://www.gamespot.com/api/games/"
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        data = response.json ()
        return(data)
    except requests.exceptions.HTTPError as http_err:
        print (f"HTTP error occurred: {http_err}")
    except Exception as err:
        print(f"Other error occurred: {err}")
    

In [None]:
offset = 0
total_results = 1


while offset < total_results : 
    data = gitgames(limit = 100, offset=offset)    
    total_results = data.get('number_of_total_results')
    print (offset, end='\r')
    if offset == 0 : 
        games_df = pd.DataFrame(data['results'])
    else : 
        games_df = pd.concat ([games_df, pd.DataFrame(data['results'])], axis=0)
    offset = offset + 100 


In [None]:
games_df.reset_index(drop=True, inplace=True)

In [None]:
games_df.describe()

In [None]:
games_df.head()

In [None]:
len(games_df)

In [None]:
games_df.tail()

In [None]:
#df is your dataframe
games_df.to_csv("gamespot_games.csv", sep=',', encoding='utf-8', index=False)

In [4]:
def getReviews(limit = 100, offset=0):  
    params.setdefault('limit', limit)
    params.setdefault('offset', offset)

    
    try: 
        url = 'http://www.gamespot.com/api/reviews/'
        response = requests.get(url, headers=headers, params=params)
        response.raise_for_status()
        data = response.json ()
        return(data)
    except requests.exceptions.HTTPError as http_err:
        print (f"HTTP error occurred: {http_err}")
    except Exception as err:
        print(f"Other error occurred: {err}")

In [5]:
offset = 0
total_results = 1


while offset < total_results : 
    data = getReviews(limit = 100, offset=offset)    
    total_results = data.get('number_of_total_results')
    print (f'{offset}/{total_results}', end='\r')
    if offset == 0 : 
        review_df = pd.DataFrame(data['results'])
    else : 
        review_df = pd.concat ([review_df, pd.DataFrame(data['results'])], axis=0)
    offset = offset + 100 


15800

In [None]:
review_df.reset_index(drop=True, inplace=True)

In [None]:
review_df.tail()

In [6]:
review_df['authors'].value_counts()

Gamespot Staff      2703
Jeffrey Adam        2544
Hugo Foster         2226
Chris Hudak         2067
Ron Dulin           1908
Trent Ward          1590
Jeff Sengstack       795
Tim McDonald         636
Tal Blevins          477
Jim Varner           318
Stephen Poole        159
Rebecca Anderson     159
Tim Soete            159
Kevin Mical          159
Name: authors, dtype: int64

In [None]:
review_df[['game_title', 'game_id']] = review_df['game'].apply(lambda x: pd.Series([x['name'], x['id']]))

In [None]:
review_df['id'].value_counts()

In [None]:
len(review_df)

In [None]:
games_df.head()

In [None]:

games_df.rename(columns={'id': 'game_id'}, inplace=True)
games_df.head()

In [None]:
review_df['title'].value_counts()   

In [None]:
review_df['authors'].value_counts()   

In [None]:
game_reviews = review_df.join(games_df, lsuffix='_rev', on='game_id', how='inner')

In [None]:
# set max columns to display as none    
# pd.set_option('display.max_columns', None)

display(game_reviews['game_id'].value_counts())

In [None]:
len(game_reviews)

In [None]:
game_reviews.to_csv("data/gamespot_reviews.csv", sep=',', encoding='utf-8', index=False)

In [None]:
game_reviews.columns


In [None]:
game_reviews.drop(columns=['review_id', 'game_id_rev', 'game_id', 'publish_date',
    'site_detail_url', 'game_id_rev', 'videos_api_url', 'articles_api_url', 'reviews_api_url',
    'images_api_url', 'releases_api_url', 'image', 'image_rev', 'deck_rev',
    'site_detail_url_rev', 'deck', 'update_date', 'review_type',  
    'release_date', 'name', 'lede', 'title', 'good', 'bad', 'body',
    'game', 'franchises'
    ], inplace=True)

In [None]:
game_reviews.drop(columns=['description'], inplace=True)

game_reviews.columns

In [None]:
game_reviews.reset_index(drop=True, inplace=True)

In [None]:
game_reviews.head()

In [None]:
game_reviews['releases'][0]

In [None]:
def getPlatforms (x):

    platforms = []

    try:
        for i in range(len(x)):
            if x[i]['platform'] not in platforms:
                platforms.append(x[i]['platform'])
    except:
        return []
    return platforms

In [None]:
game_reviews['platforms'] = game_reviews['releases'].apply(lambda x: getPlatforms(x)) 

In [None]:
game_reviews_bak = game_reviews.copy()

In [None]:
def getGenres (x):

    genres = []

    try:
        for i in range(len(x)):
            if x[i]['name'] not in genres:
                genres.append(x[i]['name'])
    except:
        return []
    return genres

In [None]:
game_reviews['genres'] = game_reviews['genres'].apply(lambda x: getGenres(x))

In [None]:
game_reviews

In [None]:
def getThemes (x):

    themes = []

    try:
        for i in range(len(x)):
            if x[i]['name'] not in themes:
                themes.append(x[i]['name'])
    except:
        return []
    return themes

In [None]:
game_reviews['themes'] = game_reviews['themes'].apply(lambda x: getThemes(x))

In [None]:
game_reviews.drop(columns=['releases'], inplace=True)

In [None]:
game_reviews

In [None]:
game_reviews_bak1 = game_reviews.copy()


In [None]:
game_reviews = game_reviews_bak1.copy()

In [None]:
game_reviews['genres'].value_counts()

In [None]:
game_reviews.to_csv("data/gamespot_reviews.csv", sep=',', encoding='utf-8', index=False)


In [None]:
game_reviews.drop(columns=['releases'], inplace=True)

In [None]:
game_reviews.isna().sum()

In [None]:
game_reviews.dtypes

In [None]:
# change score dtype to float
game_reviews['score'] = game_reviews['score'].astype(float)

In [None]:
game_pivot = game_reviews.pivot_table(index='game_title', columns='authors', values='score')

In [None]:
game_pivot.fillna(0, inplace=True)
game_pivot.describe()

In [None]:
from scipy.sparse import csr_matrix

In [None]:
game_reviews_bak = game_reviews.copy()

In [None]:
# set max display rows to none
pd.set_option('display.max_rows', None)

game_reviews[['game_title', 'authors']].value_counts()

In [None]:
game_reviews['genres'].value_counts()

In [None]:
game_reviews['game_title'].value_counts()

In [None]:
game_reviews['themes'].value_counts()

In [None]:
game_reviews['platforms'].value_counts()

In [None]:
game_reviews[['authors', 'game_title']].drop_duplicates(inplace=True)

In [None]:
game_reviews.drop_duplicates(subset=['authors', 'game_title'], inplace=True)

In [None]:
review_number = game_reviews.groupby('game_title')['authors'].count().reset_index()
review_number.rename(columns={'authors': 'number_of_reviews'}, inplace=True)

display(review_number.head())
display(review_number.tail())

In [None]:
display(review_number.describe())

In [None]:
game_sparse = csr_matrix(game_pivot)

In [None]:
from sklearn.neighbors import NearestNeighbors
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

knn_ball_tree = NearestNeighbors(n_neighbors=5, algorithm='ball_tree')
knn_brute = NearestNeighbors(algorithm='brute')

X = game_sparse
y = game_sparse

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

knn_ball_tree.fit(X_train)

y_ball_tree = knn_ball_tree.kneighbors(X_test)