In [1]:
import pandas as pd

from typing import Dict
import warnings
warnings.filterwarnings("ignore")

In [2]:
df_playtime_genre = pd.read_csv('data/csv/playtime_genre.csv', encoding='utf-8')
df_user_for_genre = pd.read_csv('data/csv/user_for_genre.csv', encoding='utf-8')
df_user_recommend = pd.read_csv('data/csv/user_recommend.csv', encoding='utf-8')
df_sentiment_year = pd.read_csv('data/csv/sentiment_year.csv', encoding='utf-8')

# Play Time Genre
This function receives a genre as a parameter and returns the year with the most hours played for said genre.

In [3]:
def PlayTimeGenre(genre: str):
    """
    Returns the year with the most hours played for the given genre.

    Parameters:
    - genre (str): Genre for which you want to obtain the year with the most hours played.

    Returns:
    - dict: Dictionary with the year of release with the most hours played for the given genre.
    """
    #genre_lower = genre.lower()
    # Filter the DataFrame by the given gender
    genre_df = df_playtime_genre[df_playtime_genre['genres'] == genre]

    # Get the year with most played hours
    max_playtime_year = genre_df.loc[genre_df['playtime_forever'].idxmax(), 'release_year']

    return {f"Release year with the most hours played for the genre {genre}": max_playtime_year}



In [4]:
def PlayTimeGenre(genre: str):
    """
    Returns the release year with the most hours played for the given genre.

    Parameters:
    - genre (str): Genre for which you want to obtain the release year with the most hours played.

    Returns:
    - dict: Dictionary with the release year with the most hours played for the specified genre.
    """
    # Convert the gender to lowercase to do the search regardless of upper or lower case
    genre_lower = genre.lower() if isinstance(genre, str) else None

    # Filter the DataFrame by the given gender
    genre_df = df_playtime_genre[df_playtime_genre['genres'].str.lower() == genre_lower]

    if genre_df.empty:
        return {f"No hay datos disponibles para el género {genre}": None}

    # Check for data before trying to get the index of the maximum value
    if not genre_df['playtime_forever'].empty:
        # Get the year with the most hours played
        max_playtime_year = genre_df.loc[genre_df['playtime_forever'].idxmax(), 'release_year']

        return {f"Release year with the most hours played for the genre {genre}": max_playtime_year}
    else:
        return {f"There is no data available for the gender {genre}": None}


In [5]:
PlayTimeGenre('Action')

{'Release year with the most hours played for the genre Action': '2012'}

# User for genre

In [6]:
def UserForGenre(genre: str):
    """
    Returns the user with the most hours played for the given genre and a list of hours played per year for that user.

    Parameters:
    - genre (str): Genre for which you want to obtain the user and the accumulation of hours played.

    Returns:
    - dict: Dictionary with the user with the most hours played and the list of hours played per year for that user.
    """
    
    # Convert the gender to lowercase to do the search regardless of upper or lower case
    genre_lower = genre.lower() if isinstance(genre, str) else None
    # Filter the DataFrame by the given gender
    genre_df = df_user_for_genre[df_user_for_genre['genres'].str.lower() == genre_lower]

    if genre_df.empty:
        return {"User with the most hours played for the genre": None, "Hours played per year": {}}

    # Group by user and add hours played
    user_playtime_sum = genre_df.groupby('user_id')['playtime_forever'].sum()

    # Take the user with the maximum sum of hours played
    max_playtime_user = user_playtime_sum.idxmax()

    # Filter the DataFrame by the specific user
    user_df = genre_df[genre_df['user_id'] == max_playtime_user]

    # Create a dictionary with the hours played per year for the mentioned user
    playtime_by_year = dict(zip(user_df['release_year'], user_df['playtime_forever']))

    return {f"User with the most hours played for the genre {genre}": max_playtime_user, "Hours played per year": playtime_by_year}


In [14]:
UserForGenre('action')

{'User with the most hours played for the genre action': 'Sp3ctre',
 'Hours played per year': {'1993': 0,
  '1998': 0,
  '1999': 44,
  '2002': 238,
  '2003': 7673,
  '2004': 127411,
  '2005': 21339,
  '2006': 896,
  '2008': 224,
  '2010': 78083,
  '2012': 378296,
  '2015': 312512,
  '2017': 43327,
  'no data': 84602}}

# User Recommend

In [8]:
def UsersRecommend(year):
    '''
    This function takes a year as input and filters user reviews for that year, considering only recommended reviews.
    It then selects positive/neutral reviews (sentiment_analysis 1 or 2) and counts the recommendations for each game.
    The function returns the top 3 games with the highest recommendation counts in the specified year.

    Parameters:
    - year (int): The target year for filtering reviews.

    Returns:
    List of dictionaries, where each dictionary represents a top game and its recommendation count.
    Example:
    [{'Position 1: GameA': 30}, {'Position 2: GameB': 25}, {'Position 3: GameC': 20}]
    '''
    # Filter reviews for the given year and which are recommended
    filtered_reviews = df_user_recommend[(df_user_recommend['posted'] == year) & (df_user_recommend['recommend'] == True)]
    
    # Filter only positive/neutral reviews (sentiment_analysis 1 or 2)
    positive_reviews = filtered_reviews[filtered_reviews['sentiment_analysis'].isin([1, 2])]
    
    # Count recommendations per item
    recommendations_count = positive_reviews['item_name'].value_counts().reset_index()
    recommendations_count.columns = ['item_name', 'recommendations_count']
    
    # Get top 3
    top3_recommendations = recommendations_count.head(3)
    
    
    result = [{"Position {}: {}".format(i+1, row['item_name']): row['recommendations_count']} for i, row in top3_recommendations.iterrows()]
    
    return result


In [9]:
UsersRecommend(2010)

[{'Position 1: Team Fortress 2': 20},
 {'Position 2: Killing Floor': 6},
 {"Position 3: Garry's Mod": 4}]

# Users Not Recommend

In [10]:
def UsersNotRecommend(year):
    '''
    This function takes a year as input and filters user reviews for that year, considering only recommended reviews.
    It then selects negative reviews (sentiment_analysis 0) and counts the recommendations for each game.
    The function returns the top 3 games with the lowest recommendation counts in the specified year.

    Parameters:
    - year (int): The target year for filtering reviews.

    Returns:
    List of dictionaries, where each dictionary represents a low top items and its recommendation count.
    Example:
    [{'Position 1: GameA': 30}, {'Position 2: GameB': 25}, {'Position 3: GameC': 20}]
    '''
    # Filter reviews for the given year that are not recommended
    filtered_reviews = df_user_recommend[(df_user_recommend['posted'] == year) & (df_user_recommend['recommend'] == False)]
    
    # Filter only negative reviews (sentiment_analysis 0)
    negative_reviews = filtered_reviews[filtered_reviews['sentiment_analysis'] == 0]
    
    # Count recommendations per item
    not_recommendations_count = negative_reviews['item_name'].value_counts().reset_index()
    not_recommendations_count.columns = ['item_name', 'not_recommendations_count']
    
    # Get low top 3
    top3_not_recommendations = not_recommendations_count.head(3)
    
    
    result = [{"Position {}: {}".format(i+1, row['item_name']): row['not_recommendations_count']} for i, row in top3_not_recommendations.iterrows()]
    
    return result



In [11]:
UsersNotRecommend(2014)

[{'Position 1: Dota 2': 342},
 {'Position 2: PAYDAY 2': 225},
 {'Position 3: War Thunder': 152}]

# Sentiment Analisys

In [12]:
def sentiment_analysis(year):
    '''
    This function performs sentiment analysis on game reviews for a specified year. It filters reviews based on the release year
    and counts the number of reviews for each sentiment category (Negative, Neutral, Positive).

    Parameters:
    - year (int): The target year for filtering reviews.

    Returns:
    Dictionary containing the counts of reviews for each sentiment category.
    Example:
    {'Negative': 10, 'Neutral': 20, 'Positive': 30}
    '''

    # Filter reviews for the given year
    filtered_reviews = df_sentiment_year[df_sentiment_year['release_year'] == year]
    
    # Count the number of records for each sentiment analysis category
    sentiment_counts = filtered_reviews['sentiment_analysis'].value_counts().to_dict()
    
    # Create a dictionary with the categories and their quantities
    result = {
        'Negative': sentiment_counts.get(0, 0),
        'Neutral': sentiment_counts.get(1, 0),
        'Positive': sentiment_counts.get(2, 0)
    }
    
    return result


In [13]:
sentiment_analysis(2014)

{'Negative': 3667, 'Neutral': 3004, 'Positive': 8600}