# Modelo NLP

Se usa una funcion para medir la polaridad de un texto y generar una nueva columna **'sentiment'**. 

In [1]:
import pandas as pd
from textblob import TextBlob

In [3]:
# Se lee archivo CSV
df_NLPM_v1 = pd.read_csv('Data\data_NLPM_1.csv')

In [4]:
# Se verifica
df_NLPM_v1.head()

Unnamed: 0,posted,item_id,recommend,review,user_id
0,2011,1250,True,Simple yet with great replayability. In my opi...,76561197970982479
1,2011,22200,True,It's unique and worth a playthrough.,76561197970982479
2,2011,43110,True,Great atmosphere. The gunplay can be a bit chu...,76561197970982479
3,2014,251610,True,I know what you think when you see this title ...,js41637
4,2013,227300,True,For a simple (it's actually not all that simpl...,js41637


In [72]:
# Se rellenan los nulos en la columna 'review' con espacios vacios
df_NLPM_v1['review'] = df_NLPM_v1['review'].fillna('')

In [73]:
# Copia estandar
df_NLPM_v2 = df_NLPM_v1.copy()

# Se normaliza a string
df_NLPM_v2['review'].astype(str)

0        Simple yet with great replayability. In my opi...
1                     It's unique and worth a playthrough.
2        Great atmosphere. The gunplay can be a bit chu...
3        I know what you think when you see this title ...
4        For a simple (it's actually not all that simpl...
                               ...                        
49181                                       its FUNNNNNNNN
49182    Awesome fantasy game if you don't mind the gra...
49183                                     Prettyy Mad Game
49184                                   AMAZING GAME 10/10
49185    Why I voted yes? 1. Girl characters have boobs...
Name: review, Length: 49186, dtype: object

In [74]:
# Función para analizar el sentimiento y devolver la clasificación
def analizar_sentimiento(text):
    testimonio = TextBlob(text) # Se usa la funcion de polaridad de Textblob
    polaridad = testimonio.sentiment.polarity
    if polaridad > 0:
        return 1  # Positivo
    elif polaridad < 0:
        return -1  # Negativo
    else:
        return None  # Neutral

In [75]:
# Aplicar la función a la columna 'review' y crear una nueva columna 'sentiment'
df_NLPM_v2['sentiment'] = df_NLPM_v2['review'].apply(analizar_sentimiento)

# Se verifica
df_NLPM_v2.head()

Unnamed: 0,posted,item_id,recommend,review,user_id,sentiment
0,2011,1250,True,Simple yet with great replayability. In my opi...,76561197970982479,1.0
1,2011,22200,True,It's unique and worth a playthrough.,76561197970982479,1.0
2,2011,43110,True,Great atmosphere. The gunplay can be a bit chu...,76561197970982479,1.0
3,2014,251610,True,I know what you think when you see this title ...,js41637,1.0
4,2013,227300,True,For a simple (it's actually not all that simpl...,js41637,-1.0


In [76]:
# Lo mismo
print(len(df_NLPM_v2))
df_NLPM_v2.isnull().sum()

49186


posted           0
item_id          0
recommend        0
review           0
user_id          0
sentiment    10701
dtype: int64

In [77]:
# Copia estandar
df_NLPM_v3 = df_NLPM_v2.copy()

In [78]:
# Los nulos resultantes del la polaridad neutra se reemplazan por ceros
df_NLPM_v3['sentiment'].fillna(0, inplace=True)

In [79]:
# Se elimina la columna review una vez cumplida su funcion
df_NLPM_v3 = df_NLPM_v3.drop(['review'], axis=1)

In [80]:
# Se normaliza la columna 'recommend' a numerico ya que tiene valores booleanos
df_NLPM_v3['recommend'] = df_NLPM_v3['recommend'].replace({True: 1, False: 0})

In [81]:
# Se verifica
print(len(df_NLPM_v3))
df_NLPM_v3.isnull().sum()

49186


posted       0
item_id      0
recommend    0
user_id      0
sentiment    0
dtype: int64

In [84]:
# Lo mismo
df_NLPM_v3.head()

Unnamed: 0,posted,item_id,recommend,user_id,sentiment
0,2011,1250,1,76561197970982479,1.0
1,2011,22200,1,76561197970982479,1.0
2,2011,43110,1,76561197970982479,1.0
3,2014,251610,1,js41637,1.0
4,2013,227300,1,js41637,-1.0


In [83]:
# Se normalizan las columnas a string
df_NLPM_v3['recommend'].astype(int)
df_NLPM_v3['sentiment'].astype(int)

0        1
1        1
2        1
3        1
4       -1
        ..
49181    0
49182    1
49183   -1
49184    1
49185    1
Name: sentiment, Length: 49186, dtype: int32

In [85]:
# Se carga a CSV
df_NLPM_v3.to_csv('Data\data_NLPM_2.csv', index=False)