## Recommendation System
This notebook will call in data that we cleaned in our previous notebook. We will then tranform the text data into readable form and run cosine similarities on our games.

***
## imports 
Import necessary packages and call in pickle file 

In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import matplotlib.pyplot as plt
import pickle

import nltk
from nltk.corpus import stopwords 
from nltk.tokenize import word_tokenize 
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

from datetime import date

In [None]:
df = pd.read_pickle("data/simple.pkl")

***
## Build Rec

#### This next function takes in our dataframe and compute the similarities between the game

In [4]:
def get_similarity(df):
    stopwords = nltk.corpus.stopwords.words('english')
    # the mature content section says these words frequently:
    newStopWords = ['description', 'developers', 'describe']
    stopwords.extend(newStopWords)

    count = CountVectorizer(stop_words=newStopWords)
    count_matrix = count.fit_transform(df['soup'])
    
    # Compute the cosine similarity matrix based on the count_matrix
    cosine_sim = cosine_similarity(count_matrix, count_matrix)
    
    # reset index of the main df and create a reverse mapping
    df = df.reset_index()
    indices = pd.Series(df.index, index = df['name'])
    
    return cosine_sim, indices

#### This next function takes in game title as input and outputs the 10 most similar games

In [5]:
def get_recommendations(name, df):
    
    cosine_sim, indices = get_similarity(df)
    
    # Get the index of the game that matches the name
    idx = indices[name]

    # Get the pairwsie similarity scores of all games with that game
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the games based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # make a df to put place scores and game names
    recs_df = pd.DataFrame() 
    
    # Get the scores of the 10 most similar movies
    sim_scores = sim_scores[1:6]
    recs_df['score'] = [i[1] for i in sim_scores]

    # Get the movie indices
    game_indices = [i[0] for i in sim_scores]
    
    # put the game info in the df
    games = []
    est_revenues = []
    seasons = []
    years = []
    for i in game_indices:
        game = df['name'].iloc[i]
        games.append(game)
        
        est_rev = df['est_revenue'].iloc[i]
        est_revenues.append(est_rev)
        
        season = df['season'].iloc[i]
        seasons.append(season)
        
        year = df['year'].iloc[i]
        years.append(year)
        
    recs_df['game'] = games
    recs_df['est_revenue'] = est_revenues
    recs_df['season'] = seasons
    recs_df['year'] = years
    
    recs_df = recs_df.reset_index()
    
    return recs_df

#### This fucntion will take in a game and our df. Then using the 10 most similar games and looking at the revenue and season release of each game it will out put the recommended season to release the game entered for the best revenue.

In [6]:
def get_season(name, df):
    rec_df = get_recommendations(name, df)
    
    season = 'none'
    
    year = date.today().year
    oldest = year - 20 
    rec_df = rec_df[rec_df.year >= oldest]
    
    if rec_df.est_revenue.idxmax() == 0:
        season = rec_df.loc[[0], 'season'].iloc[0]
        
    else: 
        rec_df = rec_df.sort_values(by=['est_revenue', 'score'], ascending=False)
        rec_df = rec_df.reset_index(drop=True)
        season = rec_df.loc[[0], 'season'].iloc[0]
    
    return season 

In [7]:
get_recommendations('counterstrike', df)

Unnamed: 0,index,score,game,est_revenue,season,year
0,0,0.609567,team fortress classic,29925000.0,spring,1999
1,1,0.57117,counterstrike source,107850000.0,fall,2004
2,2,0.515841,tannenberg,2323500.0,fall,2017
3,3,0.498077,intruder,154900.0,spring,2019
4,4,0.497346,ricochet,29925000.0,fall,2000


In [8]:
get_season('counterstrike', df)

'fall'

In [9]:
get_recommendations('halflife opposing force', df)

Unnamed: 0,index,score,game,est_revenue,season,year
0,0,0.651912,call of duty modern warfare 2,149925000.0,fall,2009
1,1,0.596694,call of duty 4 modern warfare,29985000.0,fall,2007
2,2,0.58842,halflife blue shift,29925000.0,summer,2001
3,3,0.587793,star wars battlefront 2 classic 2005,25165000.0,summer,2009
4,4,0.58284,halflife,53925000.0,fall,1998


In [10]:
get_season('halflife opposing force', df)

'fall'

#### (This is recommending when to release a game that has already been out and is in out dataframe. Look at flask app to see how we would do this using a new game that a user can input.)