**`Desarrollo API`**:   Propones disponibilizar los datos de la empresa usando el framework ***FastAPI***. Las consultas que propones son las siguientes:

<sub> Debes crear las siguientes funciones para los endpoints que se consumirán en la API, recuerden que deben tener un decorador por cada una (@app.get(‘/’)).<sub/>

+ def **userdata( *`User_id` : str* )**:
    Debe devolver `cantidad` de dinero gastado por el usuario, el `porcentaje` de recomendación en base a reviews.recommend y `cantidad de items`.

+ def **countreviews( *`YYYY-MM-DD` y `YYYY-MM-DD` : str* )**:
    `Cantidad de usuarios` que realizaron reviews entre las fechas dadas y, el `porcentaje` de recomendación de los mismos en base a reviews.recommend.

+ def **genre( *`género` : str* )**:
    Devuelve el `puesto` en el que se encuentra un género sobre el ranking de los mismos analizado bajo la columna PlayTimeForever. 

In [88]:
import pandas as pd

columnas = ['item_id','playtime_forever']
df = pd.read_csv(r'./datasets/aus_users_items.csv',usecols=columnas)
df.head(3)

Unnamed: 0,item_id,playtime_forever
0,10,6
1,20,0
2,30,7


In [89]:
df_playhours = pd.DataFrame(df.groupby('item_id')['playtime_forever'].agg('sum')).reset_index()
df_playhours.head()

Unnamed: 0,item_id,playtime_forever
0,10,17386015
1,20,961702
2,30,758991
3,40,154486
4,50,734562


In [90]:
columnas = ['id','genres']
df = pd.read_csv(r'./datasets/steam_games.csv',usecols=columnas)
df.rename(columns={'id':'item_id'},inplace=True)
df_merge = df_playhours.merge(df,on='item_id',how='left')
df_merge = df_merge[df_merge['playtime_forever'] != 0]
df_merge = df_merge.dropna(subset=['genres'])
# df_merge['genres'] = df_merge['genres'].astype(list)
# df_merge.sample(20)
import ast

def tolist(lst):
    text= ""
    if isinstance(lst, str) :
        lst = lst.replace("0's",'0s')
        lst =  ast.literal_eval(lst)
        # try:
        #     lst =  ast.literal_eval(lst)
        # except (SyntaxError, ValueError):
        #     return lst.lower()

    return list(lst)

df_merge['genres'] = df_merge['genres'].apply(lambda x : tolist(x))

In [91]:
def create_genre_columns(dataframe):
    genres_list = set()
    for genres in dataframe['genres']:
        if isinstance(genres, list):
            genres_list.update(genres)
    
    for genre in genres_list:
        dataframe[genre] = dataframe['genres'].apply(lambda x: 1 if genre in x else 0)
    
    return dataframe

df_with_genre_columns = create_genre_columns(df_merge)
# df_with_genre_columns.drop('genres',axis=1).to_csv('holatest.csv')

In [92]:
genre_columns_mapping = {
    'Racing': 'Racing',
    'Casual': 'Casual',
    'Sports': 'Sports',
    'Indie': 'Indie',
    'Web Publishing': 'Web Publishing',
    'Free to Play': 'Free to Play',
    'Software Training': 'Software Training',
    'RPG': 'RPG',
    'Early Access': 'Early Access',
    'Animation &amp; Modeling': 'Animation &amp; Modeling',
    'Strategy': 'Strategy',
    'Audio Production': 'Audio Production',
    'Action': 'Action',
    'Photo Editing': 'Photo Editing',
    'Massively Multiplayer': 'Massively Multiplayer',
    'Adventure': 'Adventure',
    'Education': 'Education',
    'Video Production': 'Video Production',
    'Simulation': 'Simulation',
    'Utilities': 'Utilities',
    'Design &amp; Illustration': 'Design &amp; Illustration'
}

# Inicializar un diccionario para almacenar las sumas de horas por género
genre_hours_sum = {}

# Iterar a través del diccionario de mapeo de columnas y calcular las sumas de horas
for col_bin, genre in genre_columns_mapping.items():
    if col_bin in df_with_genre_columns.columns:
        genre_hours_sum[genre] = df_with_genre_columns[df_with_genre_columns[col_bin] == 1]['playtime_forever'].sum()

# Crear un DataFrame con las sumas de horas por género
genre_summary_df = pd.DataFrame.from_dict(genre_hours_sum, orient='index', columns=['Total_Hours'])

# Agregar una columna para el ranking de géneros más jugados
genre_summary_df['Rank'] = genre_summary_df['Total_Hours'].rank(ascending=False).astype(int)

# Ordenar el DataFrame por las horas totales en orden descendente
genre_summary_df = genre_summary_df.sort_values(by='Total_Hours', ascending=False)

# Agregar el nombre "Genre" a la primera columna del índice
genre_summary_df = genre_summary_df.rename_axis('Genre')

# Mostrar el DataFrame con el ranking y las sumas de horas por género
genre_summary_df.to_csv(r'./dataquery/gener_rank.csv')


In [93]:
df_genre = pd.read_csv(r'./dataquery/gener_rank.csv')
df_genre['Genre'] = df_genre['Genre'].apply(lambda x: x.lower())
df_genre.to_csv(r'./dataquery/gener_rank.csv',index=False)

In [94]:
df_genre.head(3)

Unnamed: 0,Genre,Total_Hours,Rank
0,action,3113562606,1
1,indie,1494622404,2
2,rpg,1041022718,3


In [104]:
import json

def get_genre_info(genre):
    genre = genre.lower()
    df_genre = pd.read_csv(r'./dataquery/gener_rank.csv')
    
    if df_genre['Genre'].str.contains(genre).any():
        genre_info = df_genre[df_genre['Genre']==genre]
    else:
        return 'No se encontro el genero'
    return json.dumps(genre_info.to_json(orient='records'), indent=4)

# Prueba la función con un género
genre_to_check = 'indie'
genre_info_json = get_genre_info(genre_to_check)
print(genre_info_json)

"[{\"Genre\":\"indie\",\"Total_Hours\":1494622404,\"Rank\":2}]"


# def userforgenre(gener : str)
```python
def userforgenre( genero : str ):
    # Top 5 de usuarios con más horas de juego en el género dado, con su URL (del user) y user_id.
```

In [57]:
# Crear una tabla que me de las horas jugadas por genero de cada usuario
import pandas as pd
import ast

columnas = ['id','genres'] # Columnas a importar
df = pd.read_csv(r'./datasets/steam_games.csv',usecols=columnas)
df.rename(columns={'id':'item_id'},inplace=True)
df = df.dropna(subset=['genres']) # removemos los items que no tienen un genero definido

In [58]:
# Funcion cambia de string a formato list los objetos
def tolist(lst):
    text= ""
    if isinstance(lst, str) :
        lst = lst.replace("0's",'0s')
        lst =  ast.literal_eval(lst)

    return list(lst)
# Mapeamos la columna con la funcion creada
df['genres'] = df['genres'].apply(lambda x : tolist(x))


In [59]:
# creamos las columnas binarias de los géneros
def create_genre_columns(dataframe):
    genres_list = set()

    # se crea el set con los valores DISTINCT de los géneros
    for genres in dataframe['genres']:
        if isinstance(genres, list):
            genres_list.update(genres)
    
    # crea una columna por cada genre en genre_list 
    # le aplica luego se recore la columna genre imputando 1 cuando existe ese genero 
    # en la fila analizada
    for genre in genres_list:
        dataframe[genre] = dataframe['genres'].apply(lambda x: 1 if genre in x else 0)
    
    return dataframe

df_binario = create_genre_columns(df)
df_binario.drop('genres',axis=1,inplace=True)

In [60]:
#importamos el csv de los items por usuario
columnas = ['user_id','user_url','item_id','playtime_forever'] # Columnas a importar
df_users = pd.read_csv(r'./datasets/aus_users_items.csv',usecols=columnas)

# Merge 
df_users = df_users.merge(df_binario,on='item_id',how='left')
df_users.fillna(0,inplace=True)

# 

In [80]:
col_sum = [ 'Indie','Racing','Sports','Audio Production','Animation &amp; Modeling',
            'Video Production','Software Training','Casual','Simulation','Utilities',
            'Photo Editing','Early Access','Accounting','Education',
            'Massively Multiplayer','Action','Web Publishing',
            'Adventure','Strategy','RPG','Design &amp; Illustration','Free to Play']
for col_name in col_sum:
    df_users[col_name] = df_users['playtime_forever'] * df_users[col_name]
df_users_group = df_users.groupby(['user_id','user_url'])[col_sum].agg('sum').reset_index()
df_users_group.columns = df_users_group.columns.str.lower()
display(df_users_group.head(3))
df_users_group.to_csv(r'./dataquery/top5_users.csv',index=False)

Unnamed: 0,user_id,user_url,indie,racing,sports,audio production,animation &amp; modeling,video production,software training,casual,...,accounting,education,massively multiplayer,action,web publishing,adventure,strategy,rpg,design &amp; illustration,free to play
0,--000--,http://steamcommunity.com/id/--000--,285597300000000.0,0.0,916439400000000.0,0.0,0.0,0.0,0.0,1517954000000000.0,...,0.0,0.0,1304011000000000.0,1.316654e+20,0.0,603445700000000.0,45621650000.0,337727500000000.0,0.0,1580403000000000.0
1,--ace--,http://steamcommunity.com/id/--ace--,79390360000000.0,1296.0,0.0,0.0,0.0,0.0,0.0,76796850000000.0,...,0.0,0.0,0.0,1.422102e+19,0.0,79289500000000.0,2441525000000.0,2593277000000.0,0.0,79238670000000.0
2,--ionex--,http://steamcommunity.com/id/--ionex--,698443900000.0,0.0,0.0,0.0,0.0,0.0,0.0,9622141000.0,...,0.0,0.0,5908721000.0,1.584297e+18,0.0,542176400000.0,2300259000.0,510838900000.0,0.0,201701200000.0


In [87]:
def genre_top5_users(text : str):
    text = text.lower().strip()
    df1 = pd.read_csv(r'./dataquery/top5_users.csv')
    if text not in df1.columns:
        return "Genre not found"
    
    top5 = df1.sort_values(by=text,ascending=False).head(5).reset_index()
    response = []
    for i in range (0 , 5):
        user_id = top5['user_id'].loc[i]
        user_url = top5['user_url'].loc[i]
        item_dict = {"user_id":user_id,
                     "user_url":user_url}
        response.append(item_dict)
    return response

genre_top5_users('rpg ')

[{'user_id': 'Evilutional',
  'user_url': 'http://steamcommunity.com/id/Evilutional'},
 {'user_id': 'shinomegami',
  'user_url': 'http://steamcommunity.com/id/shinomegami'},
 {'user_id': 'thiefofrosesinlalaland',
  'user_url': 'http://steamcommunity.com/id/thiefofrosesinlalaland'},
 {'user_id': '76561198027406146',
  'user_url': 'http://steamcommunity.com/profiles/76561198027406146'},
 {'user_id': 'KingXRay', 'user_url': 'http://steamcommunity.com/id/KingXRay'}]

'--ace--'

Index(['user_id', 'user_url', 'item_id', 'playtime_forever', 'Photo Editing',
       'Utilities', 'Casual', 'Sports', 'Early Access', 'Accounting',
       'Simulation', 'Indie', 'Education', 'Design &amp; Illustration',
       'Free to Play', 'Animation &amp; Modeling', 'Adventure', 'Strategy',
       'Web Publishing', 'Racing', 'RPG', 'Action', 'Audio Production',
       'Software Training', 'Video Production', 'Massively Multiplayer'],
      dtype='object')

+ def **developer( *`desarrollador` : str* )**:
    `Cantidad` de items y `porcentaje` de contenido Free por año según empresa desarrolladora. 
Ejemplo de salida:
    | Activision ||
    |----------|----------|
    | Año  | Contenido Free  |
    | 2023   | 27% |
    | 2022    | 25%   |
    | xxxx    | xx%   |

+ def **sentiment_analysis( *`año` : int* )**:
    Según el año de lanzamiento, se devuelve una lista con la cantidad de registros de reseñas de usuarios que se encuentren categorizados con un análisis de sentimiento. 

    &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;Ejemplo de retorno: *{Negative = 182, Neutral = 120, Positive = 278}*