# Game Review

### Import all the necessary library

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import tflearn
import re
from tflearn.data_utils import to_categorical, pad_sequences
from sklearn.model_selection import train_test_split
from collections import Counter

### Read the csv file using pandas
Please note that we are deleting the `url` column since it is of no use to us.

In [2]:
data = pd.read_csv('ign.csv', header = 0)
del data['url']
print(data.head())

   Unnamed: 0 score_phrase                                              title  \
0           0      Amazing                            LittleBigPlanet PS Vita   
1           1      Amazing  LittleBigPlanet PS Vita -- Marvel Super Hero E...   
2           2        Great                               Splice: Tree of Life   
3           3        Great                                             NHL 13   
4           4        Great                                             NHL 13   

           platform  score       genre editors_choice  release_year  \
0  PlayStation Vita    9.0  Platformer              Y          2012   
1  PlayStation Vita    9.0  Platformer              Y          2012   
2              iPad    8.5      Puzzle              N          2012   
3          Xbox 360    8.5      Sports              N          2012   
4     PlayStation 3    8.5      Sports              N          2012   

   release_month  release_day  
0              9           12  
1              9      

### Load all the titles in a list

In [3]:
titles = list(data['title'])
print(titles)



### Represent all the titles in a vector form
To do so, fist we need to remove all the numbers, roman characters and special characters from each title.
After that, split each title into an array of words and app it to a dictionary as key with an integer value. This integer value will be used to represent the position of that word in the vector.
Then, create an `np` array of size `num_of_titles * len(title_dict)`.
Finally, traverse through each title, split them into words, count the occurence of each word and add that count to the corresponding element in the matrix.

In [31]:
feature = list()
title_set = set()
title_dict = dict()
counter = 0
for title in data['title']:
    feature.append(re.sub('[^a-zA-Z ]', '', title).lower())

for title in feature:
    words = title.split(' ')
    for word in words:
        title_set.add(word)
        if not word in title_dict.keys():
            title_dict[word] = counter        
            counter += 1
feature_vector = np.zeros((len(data), len(title_dict.keys())))

for i in range(0, len(feature)):
    words = feature[i].split(' ')
    for word in words:
        count = title.count(word)
        feature_vector[i, int(title_dict[word])] = count
print(feature_vector.shape)
feature_vector

(18625, 7972)


array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

### Add all the `score_phrase` in a set for uniqueness

In [32]:
score_phrase_set = set()
for phrase in data['score_phrase']:
    score_phrase_set.add(phrase)
print(score_phrase_set)

{'Bad', 'Disaster', 'Great', 'Unbearable', 'Painful', 'Good', 'Mediocre', 'Amazing', 'Masterpiece', 'Okay', 'Awful'}


### Convert all the `score_phrase` into a numerical representation
First we represent all the text in a numerical form. 
Then, to convert a text into a numerical representation, we use TFLearn's `to_categorical` method

In [33]:
target = list()
for score in data['score_phrase']:
    if score == 'Awful':
        target.append(0)
    elif score == 'Unbearable':
        target.append(1)
    elif score == 'Bad':
        target.append(2)
    elif score == 'Disaster':
        target.append(3)
    elif score == 'Painful':
        target.append(4)
    elif score == 'Great':
        target.append(5)
    elif score == 'Masterpiece':
        target.append(6)
    elif score == 'Okay':
        target.append(7)
    elif score == 'Good':
        target.append(8)
    elif score == 'Amazing':
        target.append(9)
    elif score == 'Mediocre':
        target.append(10)
        
target = np.array(to_categorical(target, nb_classes = 11))
target

array([[ 0.,  0.,  0., ...,  0.,  1.,  0.],
       [ 0.,  0.,  0., ...,  0.,  1.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  1.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

### Training and Testing split

In [34]:
trainX, trainY, testX, testY = train_test_split(feature_vector, target, test_size = 0.3)

### Now create a model

In [50]:
# This resets all parameters and variables, leave this here
tf.reset_default_graph()

# Input Layer
net = tflearn.input_data([None, len(title_dict.keys())])

# Hidden Layer
net = tflearn.fully_connected(net, 1000, activation = 'softmax')
net = tflearn.fully_connected(net, 200, activation = 'softmax')
# net = tflearn.fully_connected(net, 20, activation = 'softmax')

# Output Layer
net = tflearn.fully_connected(net, 11, activation = 'softmax')

net = tflearn.regression(net, optimizer = 'sgd', learning_rate = 0.006, loss = 'categorical_crossentropy')

model = tflearn.DNN(net)

### Training the network

In [51]:
model.fit(np.array(feature_vector), np.array(target), validation_set=0.1, show_metric=True, batch_size=32, n_epoch=100)

Training Step: 171  | total loss: [1m[32m2.31822[0m[0m | time: 18.558s
[2K| SGD | epoch: 001 | loss: 2.31822 - acc: 0.2432 -- iter: 05472/16762


KeyboardInterrupt: 