# Analytics Vidhya - JanataHack - NLP

---



## https://satya-python.blogspot.com/

###  To predict reviewer/user recommendation 

#### April 2020

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import re

import warnings
warnings.filterwarnings('ignore')

pd.options.display.max_colwidth = 100
pd.set_option('display.max_columns', 50)

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler,QuantileTransformer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, HashingVectorizer, TfidfTransformer


  import pandas.util.testing as tm


In [2]:
import nltk
nltk.download("stopwords")

from nltk import word_tokenize
from nltk.corpus import stopwords
stop_words = stopwords.words('english')

def remove_stopwords(string):
    word_list = [word.lower() for word in string.split()]
    stopwords_list = list(stopwords.words("english"))
    for word in word_list:
        if word in stopwords_list:
            word_list.remove(word)
    return ' '.join(word_list)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# Loading Data

In [3]:
# For Google Colaboratory and Google Drive
from google.colab import files,drive
drive.mount('/content/drive')
%cd /content/drive/'My Drive'/'Analytics Vidhya'/'JanataHack - NLP 2020'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/Analytics Vidhya/JanataHack - NLP 2020


In [4]:
%ls -tlr

total 20377
-rw------- 1 root root  6635220 Apr 17 06:16 test.csv
-rw------- 1 root root 14034581 Apr 17 07:33 train.csv
-rw------- 1 root root   132737 Apr 17 17:43 game_overview.csv
-rw------- 1 root root    61858 Apr 18 02:04 submission.csv


In [0]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
test_review_ids = test["review_id"]

# EDA - Exploratory Data Analysis

In [6]:
# Check number of features and data points in train and test
print("Number of data points in train: %d" % train.shape[0])
print("Number of features in train: %d" % train.shape[1])

print("Number of data points in test: %d" % test.shape[0])
print("Number of features in test: %d" % test.shape[1])

Number of data points in train: 17494
Number of features in train: 5
Number of data points in test: 8045
Number of features in test: 4


In [7]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17494 entries, 0 to 17493
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   review_id        17494 non-null  int64  
 1   title            17494 non-null  object 
 2   year             17316 non-null  float64
 3   user_review      17494 non-null  object 
 4   user_suggestion  17494 non-null  int64  
dtypes: float64(1), int64(2), object(2)
memory usage: 683.5+ KB


In [8]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8045 entries, 0 to 8044
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   review_id    8045 non-null   int64  
 1   title        8045 non-null   object 
 2   year         7978 non-null   float64
 3   user_review  8045 non-null   object 
dtypes: float64(1), int64(1), object(2)
memory usage: 251.5+ KB


In [9]:
train.head(10)

Unnamed: 0,review_id,title,year,user_review,user_suggestion
0,1,Spooky's Jump Scare Mansion,2016.0,I'm scared and hearing creepy voices. So I'll pause for a moment and write a review while I wai...,1
1,2,Spooky's Jump Scare Mansion,2016.0,"Best game, more better than Sam Pepper's YouTube account. 10/10What you'll need to play:A comput...",1
2,3,Spooky's Jump Scare Mansion,2016.0,"A littly iffy on the controls, but once you know how to play, very easy to master. I've made it ...",1
3,4,Spooky's Jump Scare Mansion,2015.0,"Great game, fun and colorful and all that.A side note, though: When are we getting windowed mode...",1
4,5,Spooky's Jump Scare Mansion,2015.0,Not many games have the cute tag right next to the horror tag on Steam.I first played this game ...,1
5,6,Spooky's Jump Scare Mansion,2015.0,"Early Access ReviewIt's pretty cute at first, but then later gets horrifying and it really does ...",1
6,7,Spooky's Jump Scare Mansion,2017.0,Great game. it's a cute little horror game that progressively gets darker and scarier. It has a ...,1
7,8,Spooky's Jump Scare Mansion,2015.0,Spooky's Jump Scare Mansion is a Free Retro maze game with jump scares and death. It worked on ...,1
8,9,Spooky's Jump Scare Mansion,2015.0,"Somewhere between light hearted, happy parody and being afraid of the dark lies Spooky's House o...",0
9,10,Spooky's Jump Scare Mansion,2015.0,This game with its cute little out of the wall pop-ups that scared the living light out of me I ...,1


In [10]:
test.head(10)

Unnamed: 0,review_id,title,year,user_review
0,1603,Counter-Strike: Global Offensive,2015.0,"Nice graphics, new maps, weapons and models. But developers should listen to the customers a bit..."
1,1604,Counter-Strike: Global Offensive,2018.0,I would not recommend getting into this at its current state. CSGO has hit rock bottom with Valv...
2,1605,Counter-Strike: Global Offensive,2018.0,"Edit 11/12/18I have tried playing CS:GO recently and it has only dramatically gotten worse, now ..."
3,1606,Counter-Strike: Global Offensive,2015.0,"The game is great. But the community is the worst.If you're into a match with a russian, maybe i..."
4,1607,Counter-Strike: Global Offensive,2015.0,I thank TrulyRazor for buying this for me a long time ago -- I insisted to him that I would not ...
5,1608,Counter-Strike: Global Offensive,2014.0,It is kind of hard to say I do not like the game after > 1100 hours of gameplay. It is very addi...
6,1609,Counter-Strike: Global Offensive,2016.0,"Since an old fan of Halo games i find, CS:GO a nice sequel to the franchise. You can jump with p..."
7,1610,Counter-Strike: Global Offensive,2013.0,"Hepimiz küçüklüğümüzde Counter Strike oyunlarından birini muhakkak oynamışızdır, Counter'ın yeni..."
8,1611,Counter-Strike: Global Offensive,2015.0,"Death is a great teacher. Failure in Counter-Strike: Global Offensive is, as it always has been ..."
9,1612,Counter-Strike: Global Offensive,2015.0,"One of the worst communities in gaming. Griefers, cheaters, squeaky kids, flamers... Theres ever..."


In [11]:
train.nunique()

review_id          17494
title                 44
year                   8
user_review        17490
user_suggestion        2
dtype: int64

In [12]:
# Checking for NULL/missing values
train.isnull().sum().sort_values(ascending=False).nlargest(20)

year               178
user_suggestion      0
user_review          0
title                0
review_id            0
dtype: int64

In [13]:
# Checking for NULL/missing values
test.isnull().sum().sort_values(ascending=False).nlargest(20)

year           67
user_review     0
title           0
review_id       0
dtype: int64

In [14]:
train.title.value_counts().sort_values(ascending=False)

Robocraft                                             842
Eternal Card Game                                     791
Heroes & Generals                                     745
War Thunder                                           720
Fractured Space                                       718
Bless Online                                          712
The Elder Scrolls®: Legends™                          565
Neverwinter                                           546
AdventureQuest 3D                                     519
theHunter Classic                                     518
Creativerse                                           492
DCS World Steam Edition                               488
Infestation: The New Z                                479
Team Fortress 2                                       479
PlanetSide 2                                          472
Path of Exile                                         458
SMITE®                                                454
Fallout Shelte

In [15]:
train.year.value_counts().sort_values(ascending=False)

2018.0    4822
2016.0    4226
2017.0    3890
2015.0    2460
2014.0    1499
2013.0     340
2012.0      65
2011.0      14
Name: year, dtype: int64

In [16]:
train.user_suggestion.value_counts().sort_values()

0    7526
1    9968
Name: user_suggestion, dtype: int64

# Preparing Data

In [0]:
train['title'] = train['title'].astype(str)
test['title'] = test['title'].astype(str)
train['title'] = train['title'].str.lower()
test['title'] = test['title'].str.lower()

train['user_review'] = train['user_review'].astype(str)
test['user_review'] = test['user_review'].astype(str)
train['user_review'] = train['user_review'].str.lower()
test['user_review'] = test['user_review'].str.lower()

In [18]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17494 entries, 0 to 17493
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   review_id        17494 non-null  int64  
 1   title            17494 non-null  object 
 2   year             17316 non-null  float64
 3   user_review      17494 non-null  object 
 4   user_suggestion  17494 non-null  int64  
dtypes: float64(1), int64(2), object(2)
memory usage: 683.5+ KB


In [19]:
train.head()

Unnamed: 0,review_id,title,year,user_review,user_suggestion
0,1,spooky's jump scare mansion,2016.0,i'm scared and hearing creepy voices. so i'll pause for a moment and write a review while i wai...,1
1,2,spooky's jump scare mansion,2016.0,"best game, more better than sam pepper's youtube account. 10/10what you'll need to play:a comput...",1
2,3,spooky's jump scare mansion,2016.0,"a littly iffy on the controls, but once you know how to play, very easy to master. i've made it ...",1
3,4,spooky's jump scare mansion,2015.0,"great game, fun and colorful and all that.a side note, though: when are we getting windowed mode...",1
4,5,spooky's jump scare mansion,2015.0,not many games have the cute tag right next to the horror tag on steam.i first played this game ...,1


In [20]:
train['review_length'] = train['user_review'].apply(len)
train['user_review'] = train['user_review'].str.lower()

train['user_review'] = train['user_review'].map(lambda x: re.sub('\\n',' ',str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r'\W',' ',str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r'https\s+|www.\s+',r'', str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r'http\s+|www.\s+',r'', str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r'\s+[a-zA-Z]\s+',' ',str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r'\^[a-zA-Z]\s+',' ',str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r'\s+',' ',str(x)))

train['user_review'] = train['user_review'].map(lambda x: re.sub(r"\’", "\'", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"won\'t", "will not", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"can\'t", "can not", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"don\'t", "do not", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"dont", "do not", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"n\’t", " not", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"n\'t", " not", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"\'re", " are", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"\'s", " is", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"\’d", " would", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"\d", " would", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"\'ll", " will", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"\'t", " not", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"\'ve", " have", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"\'m", " am", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"\n", "", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"\r", "", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"[0-9]", "digit", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"\'", "", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r"\"", "", str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r'[?|!|\'|"|#]',r'', str(x)))
train['user_review'] = train['user_review'].map(lambda x: re.sub(r'[.|,|)|(|\|/]',r' ', str(x)))
train['user_review'] = train['user_review'].apply(lambda x: remove_stopwords(x))
train.head(10)

Unnamed: 0,review_id,title,year,user_review,user_suggestion,review_length
0,1,spooky's jump scare mansion,2016.0,scared hearing creepy voices pause moment write review wait heart beat return atleast somewhat c...,1,710
1,2,spooky's jump scare mansion,2016.0,best game better sam pepper youtube account would would would wouldwhat ll need play computersom...,1,335
2,3,spooky's jump scare mansion,2016.0,littly iffy the controls know play easy master made floor would would would would but due to cer...,1,397
3,4,spooky's jump scare mansion,2015.0,great game fun colorful all side note though are getting windowed mode computer hates fullscreen...,1,280
4,5,spooky's jump scare mansion,2015.0,many games cute tag right next horror tag steam first played game late would would would would c...,1,334
5,6,spooky's jump scare mansion,2015.0,early access reviewit pretty cute first then later gets horrifying it really jumpscare specimen ...,1,209
6,7,spooky's jump scare mansion,2017.0,great game a cute little horror game progressively gets darker scarier has sense humor ve played...,1,198
7,8,spooky's jump scare mansion,2015.0,spooky jump scare mansion free retro maze game jump scares death worked win would would looked o...,1,917
8,9,spooky's jump scare mansion,2015.0,somewhere light hearted happy parody afraid dark lies spooky house jump scares tasked daunting d...,0,5217
9,10,spooky's jump scare mansion,2015.0,game its cute little of wall pop ups scared living light of like much played lately this very go...,1,304


In [21]:
test['review_length'] = test['user_review'].apply(len)
test['user_review'] = test['user_review'].str.lower()

test['user_review'] = test['user_review'].map(lambda x: re.sub('\\n',' ',str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r'\W',' ',str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r'https\s+|www.\s+',r'', str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r'http\s+|www.\s+',r'', str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r'\s+[a-zA-Z]\s+',' ',str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r'\^[a-zA-Z]\s+',' ',str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r'\s+',' ',str(x)))

test['user_review'] = test['user_review'].map(lambda x: re.sub(r"\’", "\'", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"won\'t", "will not", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"can\'t", "can not", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"don\'t", "do not", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"dont", "do not", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"n\’t", " not", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"n\'t", " not", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"\'re", " are", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"\'s", " is", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"\’d", " would", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"\d", " would", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"\'ll", " will", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"\'t", " not", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"\'ve", " have", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"\'m", " am", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"\n", "", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"\r", "", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"[0-9]", "digit", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"\'", "", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r"\"", "", str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r'[?|!|\'|"|#]',r'', str(x)))
test['user_review'] = test['user_review'].map(lambda x: re.sub(r'[.|,|)|(|\|/]',r' ', str(x)))
test['user_review'] = test['user_review'].apply(lambda x: remove_stopwords(x))
test.head(10)

Unnamed: 0,review_id,title,year,user_review,review_length
0,1603,counter-strike: global offensive,2015.0,nice graphics new maps weapons models developers listen customers bit developers focused much th...,433
1,1604,counter-strike: global offensive,2018.0,would recommend getting its current state csgo hit rock bottom valve missed promises game changi...,944
2,1605,counter-strike: global offensive,2018.0,edit would would would would would wouldi tried playing cs go recently only dramatically gotten ...,3048
3,1606,counter-strike: global offensive,2015.0,game great community worst into match russian maybe fine the times but there bunch of them expec...,377
4,1607,counter-strike: global offensive,2015.0,thank trulyrazor buying long time ago insisted him would play seems wrong around always known pe...,5666
5,1608,counter-strike: global offensive,2014.0,kind hard say like game would would would would hours gameplay addictive love game fact skill ma...,2804
6,1609,counter-strike: global offensive,2016.0,since old fan halo games find cs go nice sequel franchise can jump p would would awp ak make hea...,677
7,1610,counter-strike: global offensive,2013.0,hepimiz küçüklüğümüzde counter strike oyunlarından birini muhakkak oynamışızdır counter ın yenil...,997
8,1611,counter-strike: global offensive,2015.0,death great teacher failure counter strike global offensive always been series greatest way lear...,5292
9,1612,counter-strike: global offensive,2015.0,one worst communities gaming griefers cheaters squeaky kids flamers theres every type could poss...,1544


In [0]:
target = train["user_suggestion"]
train2 = train.drop(['user_suggestion'], axis=1)

x_train, x_val, y_train, y_val = train_test_split(train2, target, test_size=0.3, random_state=2019)

In [23]:
train2.head()

Unnamed: 0,review_id,title,year,user_review,review_length
0,1,spooky's jump scare mansion,2016.0,scared hearing creepy voices pause moment write review wait heart beat return atleast somewhat c...,710
1,2,spooky's jump scare mansion,2016.0,best game better sam pepper youtube account would would would wouldwhat ll need play computersom...,335
2,3,spooky's jump scare mansion,2016.0,littly iffy the controls know play easy master made floor would would would would but due to cer...,397
3,4,spooky's jump scare mansion,2015.0,great game fun colorful all side note though are getting windowed mode computer hates fullscreen...,280
4,5,spooky's jump scare mansion,2015.0,many games cute tag right next horror tag steam first played game late would would would would c...,334


In [24]:
test.head()

Unnamed: 0,review_id,title,year,user_review,review_length
0,1603,counter-strike: global offensive,2015.0,nice graphics new maps weapons models developers listen customers bit developers focused much th...,433
1,1604,counter-strike: global offensive,2018.0,would recommend getting its current state csgo hit rock bottom valve missed promises game changi...,944
2,1605,counter-strike: global offensive,2018.0,edit would would would would would wouldi tried playing cs go recently only dramatically gotten ...,3048
3,1606,counter-strike: global offensive,2015.0,game great community worst into match russian maybe fine the times but there bunch of them expec...,377
4,1607,counter-strike: global offensive,2015.0,thank trulyrazor buying long time ago insisted him would play seems wrong around always known pe...,5666


In [0]:
# Convert all the text into vector form

vec = CountVectorizer()
#vec = CountVectorizer(stop_words='english', binary=True)
#vec = CountVectorizer(stop_words='english', ngram_range=(1,10), analyzer='char', token_pattern=r'\w{1,}', min_df=4)
#vec = CountVectorizer(stop_words='english', ngram_range=(1,4), analyzer='char', min_df=4)
#vec = CountVectorizer(analyzer='word',token_pattern=r'\w{1,}', ngram_range=(1, 4), stop_words = 'english')

#vec = TfidfVectorizer(stop_words='english')
#vec = TfidfVectorizer(stop_words='english', sublinear_tf=True, strip_accents='unicode', analyzer='char', token_pattern=r'\w{1,}', ngram_range=(1, 9), max_features=2000000)
#vec = TfidfVectorizer(ngram_range=(1,5), min_df=3, stop_words='english', max_df=0.9, strip_accents='unicode', use_idf=1, smooth_idf=1, sublinear_tf=1 )
#vec = TfidfVectorizer(min_df=3, max_features=None, strip_accents='unicode', analyzer='word',token_pattern=r'\w{1,}', ngram_range=(1, 4), use_idf=1,smooth_idf=1,sublinear_tf=1, stop_words = 'english')

#vec = HashingVectorizer()

In [26]:
#X_train_counts = vec.fit_transform(x_train['user_review'])
X_train_counts = vec.fit_transform(train['user_review'])

# TF-IDF
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
X_train_tfidf.shape

#(32165, 54566)

(17494, 51616)

In [27]:
X_val_counts = vec.transform(x_val['user_review'])
X_val_tfidf = tfidf_transformer.transform(X_val_counts)
print(X_val_tfidf.shape)

(5249, 51616)


In [28]:
test_counts = vec.transform(test['user_review'])
test_tfidf = tfidf_transformer.transform(test_counts)
print(test_tfidf.shape)

(8045, 51616)


# Machine Learning Algorithms

# Logistic Regression

In [29]:
lnreg = LogisticRegression()

lnreg.fit(X_train_tfidf, target)
#lnreg.fit(x_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [30]:
y_pred_ln = lnreg.predict(X_val_tfidf)
y_pred_ln

array([1, 0, 1, ..., 1, 1, 1])

In [31]:
score = np.sqrt(mean_squared_error(y_val, y_pred_ln))
print('RMSE score: %.2f' % score)
print('Variance score: %.2f' % r2_score(y_val, y_pred_ln))
# RMSE score: 0.30
# Variance score: 0.62

RMSE score: 0.30
Variance score: 0.62


In [32]:
y_pred_ln = lnreg.predict(test_tfidf)
y_pred_ln

array([0, 0, 0, ..., 0, 1, 1])

In [33]:
submission = pd.DataFrame(data = {"review_id":test_review_ids, "user_suggestion":y_pred_ln})
submission.to_csv("submission.csv", index=False)
print(submission['user_suggestion'].value_counts())

1    4753
0    3292
Name: user_suggestion, dtype: int64


# Random Forest

In [34]:
rf = RandomForestClassifier(n_estimators=200, random_state = 2020, verbose=1).fit(X_train_tfidf, target)
#rf = RandomForestClassifier(n_estimators=100, random_state = 2020, verbose=1).fit(X_train_tfidf, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 200 out of 200 | elapsed:  1.2min finished


In [35]:
y_pred_rf = rf.predict(X_val_tfidf)
y_pred_rf

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 200 out of 200 | elapsed:    0.7s finished


array([1, 0, 1, ..., 1, 1, 1])

In [36]:
score=rf.score(X_train_tfidf, target)
#score=rf.score(X_train_tfidf, y_train)
score

# 0.9928546930376129

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 200 out of 200 | elapsed:    2.2s finished


0.9998285126329027

In [37]:
y_pred_rf = rf.predict(test_tfidf)
y_pred_rf

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 200 out of 200 | elapsed:    1.1s finished


array([1, 0, 0, ..., 1, 1, 0])

In [38]:
submission = pd.DataFrame(data = {"review_id":test_review_ids, "user_suggestion":y_pred_rf})
submission.to_csv("submission.csv", index=False)
print(submission['user_suggestion'].value_counts())

1    5384
0    2661
Name: user_suggestion, dtype: int64


In [0]:
!pwd; ls -ltr
files.download('submission.csv')

/content/drive/My Drive/Analytics Vidhya/JanataHack - NLP 2020
total 20377
-rw------- 1 root root  6635220 Apr 17 06:16 test.csv
-rw------- 1 root root 14034581 Apr 17 07:33 train.csv
-rw------- 1 root root   132737 Apr 17 17:43 game_overview.csv
-rw------- 1 root root    61858 Apr 18 02:10 submission.csv
