In [68]:
import pandas as pd
import numpy as np
import plotly.express as px
import datetime as dt
import plotly.graph_objects as go
import warnings
warnings.filterwarnings("ignore")

In [69]:
holidays_events = pd.read_csv("https://www.dropbox.com/s/bxyamlpevkiwwoq/holidays_events.csv?dl=1")
holidays_events["holiday_type"] = holidays_events["type"]
holidays_events.drop(["type"],axis=1,inplace=True)
oil = pd.read_csv("https://www.dropbox.com/s/l6ln0ztl4m0pw3a/oil.csv?dl=1",parse_dates=['date'],index_col='date')
oil2 = pd.read_csv("https://www.dropbox.com/s/l6ln0ztl4m0pw3a/oil.csv?dl=1")
#sample_submission = pd.read_csv("https://www.dropbox.com/s/68jjl61x6u3klos/sample_submission.csv?dl=1")
stores = pd.read_csv("https://www.dropbox.com/s/lcxn6r9bs2exguq/stores.csv?dl=1")
test = pd.read_csv("https://www.dropbox.com/s/cvdo1gn7r5lu2uz/test.csv?dl=1",index_col='id')
train = pd.read_csv("https://www.dropbox.com/s/s8p2b5awnuqfk0d/train.csv?dl=1",index_col='id')
transactions = pd.read_csv("https://www.dropbox.com/s/92fij9bcwt0e0cj/transactions.csv?dl=1")

## Wizualizacja danych treningowych:

### 1. Wykres liczby sprzedanych artykułów w zależności od daty i w podziale na rodzaj artykułu

In [None]:
fig = px.line(train, x='date', y='sales', color='family')
fig.show()

### 2. Wykres liczby sprzedanych artykułów w zależności od daty i w podziale na numer sklepu

In [None]:
fig = px.line(train, x='date', y='sales', color='store_nbr')
fig.show()

#### Funkcja pomocnicza

In [72]:
def przygotowanie_danych(df: pd.DataFrame, type, type_item):
    """
    Funkcja łącząca zbiory i zmieniająca zmienne kategoryczne na zmienne numeryczne. Funkcja jest przewidziana
    df - zbiór danych treningowych
    type - dane sklepowe ('family') / dane rodzinne ('store_nbr')
    type_item - konretny element zmiennej type np. dla type=="family" type_item=="AUTOMOTIVE"
    """
    #Łaczymy z pozostałymi zbiorami

    #Zbiór stores
    df = df.merge(stores,how="left",left_on=['store_nbr'],right_on=['store_nbr'])

    #Zbiór transactions
    df = df.merge(transactions,how="left",left_on=['date','store_nbr'],right_on=['date','store_nbr'])

    #Zbiór holidays_events
    df = df.merge(holidays_events,how="left",left_on=['date'],right_on=['date'])

    #Dodanie oil
    df = df.merge(oil2,how="left",left_on=['date'],right_on=['date'])

    #Interpolacja braków danych z oil
    df.interpolate(method ='linear', limit_direction ='backward', inplace=True)

    #Wybieramy family ze zbioru traningowego
    df_fam = df.loc[(df[type]==type_item)]
    
    #Dodajemy zmienne na dzień tygodnia i na miesiąc
    df_fam['dayofweek'] = pd.DatetimeIndex(df_fam['date']).dayofweek + 1
    df_fam['month'] = pd.to_datetime(df_fam['date']).dt.month

    #Usunięcie zmiennych, które nie będą zmieniane
    df_fam.drop([type,"description","transferred"],axis=1,inplace=True)
    if type == "family":
        type_opposite = "store_nbr"
    elif type == "store_nbr":
        type_opposite = "family"

    #One Hot Encoding
    df_fam = pd.get_dummies(df_fam,columns=["locale",type_opposite,"city", "state", "type", "cluster","locale_name", "holiday_type"],prefix=["locale",type_opposite,"city", "state", "type", "cluster","locale_name", "holiday_type"])
    
    #W zbiorze test jest tylko jeden miesiąc dlatego musimy zrobić technicnzy zabieg polegający na dodaniu kolumn z samymi zerami.
    if 'sales' not in df_fam.columns:
        missing_cols = ['sales','locale_National','locale_Regional', 'locale_name_Cayambe', 'locale_name_Cotopaxi', 'locale_name_Cuenca', 'locale_name_Ecuador', 'locale_name_El Carmen', 'locale_name_Esmeraldas', 'locale_name_Guaranda', 'locale_name_Guayaquil', 'locale_name_Ibarra', 'locale_name_Imbabura', 'locale_name_Latacunga', 'locale_name_Libertad', 'locale_name_Loja', 'locale_name_Machala', 'locale_name_Manta', 'locale_name_Puyo', 'locale_name_Quevedo', 'locale_name_Quito', 'locale_name_Riobamba', 'locale_name_Salinas', 'locale_name_Santa Elena', 'locale_name_Santo Domingo', 'locale_name_Santo Domingo de los Tsachilas', 'holiday_type_Additional', 'holiday_type_Bridge', 'holiday_type_Event', 'holiday_type_Transfer', 'holiday_type_Work Day']
        for i in missing_cols:
            df_fam[i] = 0

    return(df_fam)

In [73]:
t = przygotowanie_danych(test,"family","AUTOMOTIVE")
t.drop(["sales"],axis=1,inplace=True)


In [74]:
temp = []
for i in range(1,55):
    temp.append('store_nbr_'+str(i))

In [75]:
automotive = pd.read_csv("predict_2022_05_18-22-13-14/AUTOMOTIVE.csv")
automotive['store_nbr'] =automotive.loc[:,temp].idxmax(1)
automotive['date'] = t['date'].reset_index(drop=True)
automotive['store_nbr'] = automotive['store_nbr'].str[10:].astype(int)
automotive = automotive.rename(columns={'sales_pred':'sales'})
automotive = automotive[['sales','store_nbr','date']]

In [76]:
train_automotive = train.loc[(train['family'] == 'AUTOMOTIVE')]
train_automotive = train_automotive[['sales','store_nbr','date']].reset_index()

In [77]:
df_concat = pd.concat([train_automotive,automotive], axis=0).drop(['id'],axis=1)
df_concat = df_concat.groupby(['date'])['sales'].mean().to_frame()
df_concat = df_concat.reset_index()

I wersja wykresu

In [78]:
fig = px.line(df_concat, x='date', y='sales')
fig.show()

II wersja wykresu

In [79]:
fig = go.Figure([go.Scatter(x=df_concat['date'], y=df_concat['sales'])])
fig.show()

A teraz trochę ładniej

In [96]:
def wykres(family: str, type: int):
    """
    @param family: kategoria dla której chcemy narysować wykres
    @param type: typw kyresu 1 - zwykły szereg czasowy; 2 - szereg czasowy z suwakiem; 3 - szereg czasowy z suwakiem i przyciskami
    
    """
    if type not in [1,2,3]:
        raise Exception("Podano błędny numer typu wykresu!")
    t = przygotowanie_danych(test,"family",family)
    t = t.drop(["sales"],axis=1)
    temp = []
    for i in range(1,55):
        temp.append('store_nbr_'+str(i))
    df = pd.read_csv("predict_2022_05_18-22-13-14/"+family+".csv")
    df['store_nbr'] =df.loc[:,temp].idxmax(1)
    df['date'] = t['date'].reset_index(drop=True)
    df['store_nbr'] =df['store_nbr'].str[10:].astype(int)
    df = df.rename(columns={'sales_pred':'sales'})
    df = df[['sales','store_nbr','date']]
    train_df = train.loc[(train['family'] == family)]
    train_df = train_df[['sales','store_nbr','date']].reset_index()
    df_concat = pd.concat([train_df,df], axis=0).drop(['id'],axis=1)
    df_concat = df_concat.groupby(['date'])['sales'].mean().to_frame()
    df_concat = df_concat.reset_index()
    if type == 1:
        fig = px.line(df_concat, x='date', y='sales', title = "Sprzedaż dla kategorii "+family)
        fig.show()
    elif type == 2:
        fig = px.line(df_concat, x='date', y='sales')
        fig.update_xaxes(rangeslider_visible=True)
        fig.show()
    else:
        fig = px.line(df_concat, x='date', y='sales')
        fig.update_xaxes(
            rangeslider_visible=True,
            rangeselector=dict(
                buttons=list([
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(count=3, label="3m", step="month", stepmode="backward"),
                    dict(count=6, label="6m", step="month", stepmode="backward"),
                    dict(count=1, label="1y", step="year", stepmode="backward"),
                    dict(step="all")
                ])
            )
        )
        fig.show()
    

In [98]:
wykres('BABY CARE',3)