# Import necessary libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plot
import seaborn as sns
import datetime as dt
from scipy import stats
import os
import glob
sns.set()
pd.options.mode.chained_assignment = None  # default='warn'
%matplotlib inline

# Importing dataframes

## Defining cleaning functions

In [2]:
def item_header_clean(df):
    column_map = {
    "index" : "League",
    "League": "Date",
    "Date": "Id",
    "Id" : "Type"
    }
    df_indexreset = df.reset_index()
    df_renamed = df_indexreset.rename(columns = column_map)
    return df_renamed

def item_info_fix(df):
    cols_to_drop = ["Links", "Variant"]
    df_cols_dropped = df.drop(cols_to_drop, axis = 1)
    df_cols_dropped.BaseType = df_cols_dropped.BaseType.fillna(df_cols_dropped.Type)
    df_fixed = df_cols_dropped
    return df_fixed

def item_top_15_filter(df):
    median_series = df.groupby("Name")["Value"].agg("median")
    sorted_series = median_series.sort_values(ascending = False)
    top_15_list = list(sorted_series[:15].index)
    df_top_15 = df.loc[df["Name"].isin(top_15_list)]
    return df_top_15

def add_relative_date(df):
    df_copy = df
    df_copy["Date"] = df_copy["Date"].astype('datetime64[D]')
    startdate = df_copy["Date"].min()
    df_copy["RelativeDate"] = (df_copy["Date"] - startdate)
    df_edited = df_copy.drop("Date", axis = 1)
    df_edited["RelativeDate"] = pd.to_timedelta(df_edited["RelativeDate"], unit = "D")
    return df_edited

def league_lifespan(row):
    early_league = dt.timedelta(days = 14)
    mid_league = dt.timedelta(days = 60)
    if row["RelativeDate"] <= early_league:
        return "Early"
    elif row["RelativeDate"] <= mid_league:
        return "Mid"
    return "End"

def item_file_clean(df):
    df1 = item_header_clean(df)
    df2 = item_info_fix(df1)
    df3 = item_top_15_filter(df2)
    df4 = add_relative_date(df3)
    df4["League Lifespan"] = df4.apply(league_lifespan, axis = 1)
    return df4

def currency_info_fix(df):
    currency_to_drop = ["Portal Scroll", "Scroll of Wisdom", "Armourer's Scrap", "Perandus Coin", "Orb of Transmutation", "Blacksmith's Whetstone", "Orb of Augmentation", "Orb of Alteration", "Splinter of Tul", "Chromatic Orb", "Splinter of Esh", "Splinter of Xoph", "Orb of Chance", "Glassblower's Bauble", "Splinter of Uul-Netol", "Silver Coin"]
    df_currency_fixed = df.loc[~(df["Get"].isin(currency_to_drop) | df["Pay"].isin(currency_to_drop))]
    return df_currency_fixed

def currency_file_clean(df):
    df1 = currency_info_fix(df)
    df2 = add_relative_date(df1)
    df2["League Lifespan"] = df2.apply(league_lifespan, axis = 1)
    return df2

## File Import and Concatenation

In [3]:
# Import/Concatenate item files
item_df_list = []
for filename in glob.glob("*_items.csv"):
    df = pd.read_csv(filename, delimiter = ";", low_memory = False)
    df_edited = item_file_clean(df)
    item_df_list.append(df_edited)
item_df = pd.concat(item_df_list, axis = 0)
del item_df_list
item_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6164 entries, 68886 to 226813
Data columns (total 9 columns):
League             6164 non-null object
Id                 6164 non-null int64
Type               6164 non-null object
Name               6164 non-null object
BaseType           6164 non-null object
Value              6164 non-null float64
Confidence         6164 non-null object
RelativeDate       6164 non-null timedelta64[ns]
League Lifespan    6164 non-null object
dtypes: float64(1), int64(1), object(6), timedelta64[ns](1)
memory usage: 481.6+ KB


In [4]:
#Import/Concatenate currency files
currency_df_list = []
for filename in glob.glob("*_currency.csv"):
    df = pd.read_csv(filename, delimiter = ";", low_memory = False)
    df_edited = currency_file_clean(df)
    currency_df_list.append(df_edited)
currency_df = pd.concat(currency_df_list, axis = 0)
del currency_df_list
currency_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 38885 entries, 356 to 9466
Data columns (total 7 columns):
League             38885 non-null object
Get                38885 non-null object
Pay                38885 non-null object
Value              38885 non-null float64
Confidence         38885 non-null object
RelativeDate       38885 non-null timedelta64[ns]
League Lifespan    38885 non-null object
dtypes: float64(1), object(5), timedelta64[ns](1)
memory usage: 2.4+ MB
