In [7]:
import pandas as pd


StatementMeta(, a4621489-e558-49e0-92dc-3c9f07d2e282, 9, Finished, Available, Finished)

In [8]:
# Load data into pandas DataFrame from "/lakehouse/default/Files/Final_data/ML/funciones1.parquet"
genres_playtime = pd.read_parquet("/lakehouse/default/Files/Final_data/ML/funciones1.parquet")
genres_playtime


StatementMeta(, a4621489-e558-49e0-92dc-3c9f07d2e282, 10, Finished, Available, Finished)

Unnamed: 0,Genres,Release,Playtime_Millon_Hours
0,Action,2000,1401.737284
1,Action,1999,1401.737284
2,Action,2003,1401.737284
3,Action,2001,1401.737284
4,Action,1998,1401.737284
...,...,...,...
702,Steampunk,2017,0.000016
703,Cartoon,2016,0.000691
704,Cartoony,2016,0.000691
705,Accounting,2016,0.000000


In [9]:
# Load data into pandas DataFrame from "/lakehouse/default/Files/Final_data/ML/funciones2.parquet"
games_playtime = pd.read_parquet("/lakehouse/default/Files/Final_data/ML/funciones2.parquet")
games_playtime


StatementMeta(, a4621489-e558-49e0-92dc-3c9f07d2e282, 11, Finished, Available, Finished)

Unnamed: 0,Item_id,Item_name,Playtime,Release
0,10,Counter-Strike,17107858.0,2000
1,20,Team Fortress Classic,960524.0,1999
2,30,Day of Defeat,756375.0,2003
3,40,Deathmatch Classic,154424.0,2001
4,50,Half-Life: Opposing Force,726545.0,1999
...,...,...,...,...
8738,528580,Found,0.0,2016
8739,528660,Spin Rush,1092.0,2016
8740,529670,The Bellows,0.0,2016
8741,529820,Beyond Power VR,0.0,2016


In [12]:
def top_genres_by_playtime(release_year):
    """
    This function returns the top 5 genres with the highest playtime hours for a given release year.

    Parameters:
    release_year (int or float): The release year to filter the data. The function ensures that the input is a number.

    Returns:
    - A DataFrame containing the top 5 genres and their corresponding playtime hours if data for the year exists.
    - If no valid data is found for the provided year, a dictionary with an appropriate message is returned.

    Notes:
    - The function checks if the input is numeric. If not, it returns an error message.
    - It also checks if the provided year is present in the DataFrame. If not, it returns a message indicating the absence of data for that year.
    """
    # Ensure the parameter is a number
    if not isinstance(release_year, (int, float)):
        return {"Invalid input. Please provide a numeric year.": None}
    
    # Ensure that the 'Release' column is treated as a numeric type for comparison
    genres_playtime['Release'] = pd.to_numeric(genres_playtime['Release'], errors='coerce')
    
    # Check if the year is in the DataFrame
    if release_year not in genres_playtime['Release'].unique():
        return {f"There is no data available for the year {release_year}": None}
    
    # Filter the DataFrame by the release year
    genres_playtime_filtered = genres_playtime[genres_playtime['Release'] == release_year]

    # Check if the filtered DataFrame is empty
    if genres_playtime_filtered.empty:
        return {f"No data available for year {release_year}": None}
    
    # Group by genre and sum the playtime hours
    genres_playtime_grouped = genres_playtime_filtered.groupby('Genres')['Playtime_Millon_Hours'].sum().reset_index()

    # Sort by playtime hours in descending order
    genres_playtime_sorted = genres_playtime_grouped.sort_values(by='Playtime_Millon_Hours', ascending=False)

    # Get the top 5 genres
    top_5_genres = genres_playtime_sorted.head(5)

    return top_5_genres



StatementMeta(, a4621489-e558-49e0-92dc-3c9f07d2e282, 14, Finished, Available, Finished)

In [15]:
top_genres_by_playtime(2016)

StatementMeta(, a4621489-e558-49e0-92dc-3c9f07d2e282, 17, Finished, Available, Finished)

Unnamed: 0,Genres,Playtime_Millon_Hours
8,Action,1401.737284
123,RPG,764.699523
11,Adventure,730.03842
76,Indie,687.478104
61,Free to Play,628.41791


In [19]:
def top_5_games_by_playtime(release_year):
    """
    This function returns the top 5 games with the highest playtime hours for a given release year.

    Parameters:
    release_year (int): The release year to filter the data.

    Returns:
    - A DataFrame containing the top 5 game names and their corresponding playtime hours for the specified year.
    - If the input is not a number or the year is not in the DataFrame, it returns an appropriate message.
    """
    # Ensure the parameter is a number
    if not isinstance(release_year, (int, float)):
        return {"Invalid input. Please provide a numeric year.": None}
    
    # Ensure 'Release' column is numeric
    games_playtime['Release'] = pd.to_numeric(games_playtime['Release'], errors='coerce')

    # Check if the year is in the DataFrame
    if release_year not in games_playtime['Release'].unique():
        return {f"There is no data available for the year {release_year}": None}
    
    # Filter the DataFrame by the release year
    games_filtered = games_playtime[games_playtime['Release'] == release_year]

    # Check if the filtered DataFrame is empty
    if games_filtered.empty:
        return {f"No data available for year {release_year}": None}
    
    # Sort by playtime in descending order and get the top 5
    top_5_games = games_filtered.sort_values(by='Playtime', ascending=False).head(5)

    # Return only the game name and playtime
    return top_5_games[['Item_name', 'Playtime']]



StatementMeta(, a4621489-e558-49e0-92dc-3c9f07d2e282, 21, Finished, Available, Finished)

In [25]:
top_5_games_by_playtime(2016)

StatementMeta(, a4621489-e558-49e0-92dc-3c9f07d2e282, 27, Finished, Available, Finished)

Unnamed: 0,Item_name,Playtime
1390,Starbound,138791545.0
1622,Heroes & Generals,111283464.0
6675,Stardew Valley,41600760.0
3795,Don't Starve Together,28835469.0
5186,Total War: WARHAMMER,19437372.0


In [22]:
def bottom_3_games_by_playtime(release_year):
    """
    This function returns the 3 games with the lowest playtime hours (greater than 0) for a given release year.

    Parameters:
    release_year (int): The release year to filter the data.

    Returns:
    - A DataFrame containing the 3 game names with the lowest playtime hours greater than 0 for the specified year.
    - If the input is not a number or the year is not in the DataFrame, it returns an appropriate message.
    """
    # Ensure the parameter is a number
    if not isinstance(release_year, (int, float)):
        return {"Invalid input. Please provide a numeric year.": None}
    
    # Ensure 'Release' column is numeric
    games_playtime['Release'] = pd.to_numeric(games_playtime['Release'], errors='coerce')

    # Check if the year is in the DataFrame
    if release_year not in games_playtime['Release'].unique():
        return {f"There is no data available for the year {release_year}": None}
    
    # Filter the DataFrame by the release year and playtime > 0
    games_filtered = games_playtime[(games_playtime['Release'] == release_year) & (games_playtime['Playtime'] > 0)]

    # Check if the filtered DataFrame is empty
    if games_filtered.empty:
        return {f"No data available for year {release_year} with playtime greater than 0": None}
    
    # Sort by playtime in ascending order and get the bottom 3
    bottom_3_games = games_filtered.sort_values(by='Playtime', ascending=True).head(3)

    # Return only the game name and playtime
    return bottom_3_games[['Item_name', 'Playtime']]


StatementMeta(, a4621489-e558-49e0-92dc-3c9f07d2e282, 24, Finished, Available, Finished)

In [24]:
bottom_3_games_by_playtime(2016)

StatementMeta(, a4621489-e558-49e0-92dc-3c9f07d2e282, 26, Finished, Available, Finished)

Unnamed: 0,Item_name,Playtime
4563,ABRACA - Imagic Games,1.0
7502,Terror Lab,2.0
7817,A Long Way Home,2.0
