In [None]:
import requests
import pandas as pd
from datetime import timedelta
from datetime import datetime
from io import StringIO
from airflow.decorators import dag, task


VG_SALES = '/var/lib/airflow/airflow.git/dags/a.batalov/vgsales.csv'
VG_SALES_FILE = 'vgsales.csv'


login = 's-chebriklv'
my_year = 1994 + hash(f'{login}') % 23


default_args = {
    'owner': 's.chebrikov',
    'depends_on_past': False,
    'retries': 2,
    'retry_delay': timedelta(minutes=5),
    'start_date': datetime(2023, 10, 17),
}



@dag(default_args=default_args, schedule_interval = '00 17 * * *', catchup=False)
def dag_sales_games_chebrikov():
    @task()
    def get_data():
        vg_sales = pd.read_csv(VG_SALES)
        sales_data = vg_sales.to_csv(index=False)

        return sales_data

    @task()
    def get_the_most_selling_game(sales_data):
        df = pd.read_csv(StringIO(sales_data))
        df = df.loc[df['Year'] == my_year].copy()

        grouped_sells = df.groupby('Name', as_index=False) \
        .agg({'Global_Sales':'sum'}) \
        .sort_values('Global_Sales', ascending=False)
        most_selling_game = grouped_sells[:1].Name.values[0]

        return most_selling_game

    @task()
    def get_the_most_selling_genre_europe(sales_data):

        df = pd.read_csv(StringIO(sales_data))
        df = df.loc[df['Year'] == my_year].copy()

        europe_games = df.groupby('Genre', as_index=False) \
        .agg({'EU_Sales':'sum'}) \
        .sort_values('EU_Sales', ascending=False)

        most_selling_in_Europe = europe_games[:1].Genre.values[0]
        return most_selling_in_Europe

    @task()      
    def get_platform_million_sales(sales_data):

        df = pd.read_csv(StringIO(sales_data))
        df = df.loc[df['Year'] == my_year].copy()
    
        platform_games = df.query('NA_Sales > 1') \
        .groupby('Platform', as_index=False) \
        .agg({'Name':'count'}) \
        .sort_values('Name', ascending=False)

        platform = platform_games[:1].Platform.values[0]
        return platform

    @task()      
    def get_publisher_mean_sales(sales_data):

        df = pd.read_csv(StringIO(sales_data))
        df = df.loc[df['Year'] == my_year].copy()
    
        grouped_publishers = df.groupby('Publisher', as_index=False) \
        .agg({'JP_Sales':'mean'}) \
        .sort_values('JP_Sales', ascending=False)

        publisher = grouped_publishers[:1].Publisher.values[0]

        return publisher


    @task()      
    def get_games_sold_more_europe(sales_data):

        df = pd.read_csv(StringIO(sales_data))
        df = df.loc[df['Year'] == my_year].copy()
    
        sales_in_europe_japan = df.groupby('Name', as_index=False) \
        .agg({'EU_Sales':'sum','JP_Sales':'sum'})

        sales_in_europe_japan = sales_in_europe_japan.EU_Sales > sales_in_europe_japan.JP_Sales
        sales_in_europe_japan = sales_in_europe_japan.replace([False, True],[0,1])
        sales_more_in_europe = sum(sales_in_europe_japan)

        return sales_more_in_europe

    @task() 
    def print_data(
        most_selling_game,
        most_selling_in_Europe,
        platform,
        publisher,
        sales_more_in_europe

    ):
        

        print(f' Cамой продаваемой игрой в {my_year} году во всем мире была')
        print(most_selling_game)

        print(f' Самыми продаваемыми в Европе играми в {my_year} году были жанры')
        print(most_selling_in_Europe)
        
        print(f'Больше всего игр, которые продались более чем миллионным тиражом в Северной Америке в {my_year} году были на платформе')
        print(platform)

        print(f'Самые высокие средние продажи в Японии в  {my_year} году были у издателя')
        print(publisher)

        print(f'В {my_year} году игр продавалось больше в Европе, чем в Японии')
        print(sales_more_in_europe)




    sales_data = get_data()
    most_selling_game = get_the_most_selling_game(sales_data)
    most_selling_in_Europe = get_the_most_selling_genre_europe(sales_data)
    platform = get_platform_million_sales(sales_data)
    publisher = get_publisher_mean_sales(sales_data)
    sales_more_in_europe = get_games_sold_more_europe(sales_data)
    print_data(
        most_selling_game,
        most_selling_in_Europe,
        platform,
        publisher,
        sales_more_in_europe
    )

dag_sales_games_chebrikov = dag_sales_games_chebrikov()