# Import necessary libraries

In [4]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt
from scipy import stats
import os
import glob
import math
sns.set_style("white")
pd.options.mode.chained_assignment = None  # default='warn'

# Importing dataframes

## Defining cleaning functions

In [5]:
def item_header_clean(df):
    column_map = {
    "index" : "League",
    "League": "Date",
    "Date": "Id",
    "Id" : "Type"
    }
    df_indexreset = df.reset_index()
    df_renamed = df_indexreset.rename(columns = column_map)
    return df_renamed

def item_info_fix(df):
    cols_to_drop = ["Links", "Variant"]
    df_cols_dropped = df.drop(cols_to_drop, axis = 1)
    df_cols_dropped.BaseType = df_cols_dropped.BaseType.fillna(df_cols_dropped.Type)
    df_fixed = df_cols_dropped
    return df_fixed

def item_top_40_filter(df):
    median_series = df.groupby("Name")["Value"].agg("median")
    sorted_series = median_series.sort_values(ascending = False)
    top_40_list = list(sorted_series[:40].index)
    df_top_40 = df.loc[df["Name"].isin(top_40_list)]
    return df_top_40

def add_relative_date(df):
    df_copy = df
    df_copy["Date"] = df_copy["Date"].astype('datetime64[D]')
    startdate = df_copy["Date"].min()
    df_copy["RelativeDate"] = (df_copy["Date"] - startdate)
    df_edited = df_copy.drop("Date", axis = 1)
    df_edited["RelativeDate"] = pd.to_timedelta(df_edited["RelativeDate"], unit = "D")
    df_edited["RelativeDateInt"] = df_edited["RelativeDate"] / np.timedelta64(1, 'D')
    return df_edited

def league_lifespan(row):
    early_league = dt.timedelta(days = 14)
    mid_league = dt.timedelta(days = 60)
    if row["RelativeDate"] <= early_league:
        return "Early"
    elif row["RelativeDate"] <= mid_league:
        return "Mid"
    return "End"

def item_file_clean(df):
    df1 = item_header_clean(df)
    df2 = item_info_fix(df1)
    df3 = item_top_40_filter(df2)
    df4 = add_relative_date(df3)
    df4["League Lifespan"] = df4.apply(league_lifespan, axis = 1)
    return df4

def currency_info_fix(df):
    currency_to_drop = ["Portal Scroll", "Scroll of Wisdom", "Armourer's Scrap", "Perandus Coin", "Orb of Transmutation", "Blacksmith's Whetstone", "Orb of Augmentation", "Orb of Alteration", "Splinter of Tul", "Chromatic Orb", "Splinter of Esh", "Splinter of Xoph", "Orb of Chance", "Glassblower's Bauble", "Splinter of Uul-Netol", "Silver Coin"]
    df_currency_fixed = df.loc[~(df["Get"].isin(currency_to_drop) | df["Pay"].isin(currency_to_drop))]
    return df_currency_fixed

def currency_file_clean(df):
    df1 = currency_info_fix(df)
    df2 = add_relative_date(df1)
    df2["League Lifespan"] = df2.apply(league_lifespan, axis = 1)
    return df2

## File Import and Concatenation

In [6]:
# Import/Concatenate item files
item_df_list = []
for filename in glob.glob("*_items.csv"):
    df = pd.read_csv(filename, delimiter = ";", low_memory = False)
    df_edited = item_file_clean(df)
    item_df_list.append(df_edited)
item_df = pd.concat(item_df_list, axis = 0)
del item_df_list
item_df.info()

ValueError: No objects to concatenate

In [None]:
#Import/Concatenate currency files
currency_df_list = []
for filename in glob.glob("*_currency.csv"):
    df = pd.read_csv(filename, delimiter = ";", low_memory = False)
    df_edited = currency_file_clean(df)
    currency_df_list.append(df_edited)
currency_df = pd.concat(currency_df_list, axis = 0)
del currency_df_list
currency_df.info()

## Paring down item table to only include items common across all leagues

In [None]:
def common_items(df):
    unique_items = []
    for league in df.League.unique():
        league_df = df[df.League == league]
        unique_items.append(league_df.Name.unique().tolist())
    return list(set(unique_items[0]).intersection(*unique_items))

In [None]:
list_of_items = common_items(item_df)

#Remove specific items that don't make sense to track due to variations or lack of accessibility
items_to_remove = ["Eyes of the Greatwolf", "Demigod's Dominance"]
for item in items_to_remove:
    list_of_items.remove(item)

item_df_pared = item_df.loc[item_df["Name"].isin(list_of_items)]

#Set up specific item lists that group items with possible dependencies on each other
hh_items = ["The Fiend", "The Doctor", "Headhunter"]
chayula_items = ["United in Dream", "The Blue Nightmare", "The Green Nightmare", "The Red Nightmare"]
misc_items = ["Starforge", "The Retch", "Atziri's Disfavour", "Emperor's Mastery", "Skyforth", "Atziri's Acuity"]
misc_pd = ["House of Mirrors", "Trash to Treasure", "Fated Connections"]
list_of_groups = [hh_items, chayula_items, misc_items, misc_pd]
item_group_names = ["Headhunter Related Items", "Chayula Related Items", "Miscellaneous Items", "Miscellaneous Prophecies/Divination Cards"]

## Pare down both item and currency table to only include the last 90 days and not the first day

In [None]:
item_df_pared = item_df_pared[(item_df_pared["RelativeDateInt"] <= 90) & (item_df_pared["RelativeDateInt"] > 1)]
currency_df_pared = currency_df[(currency_df["RelativeDateInt"] <= 90) & (currency_df["RelativeDateInt"] > 1)]

# Exploratory Data Analysis (Visualization and Inferential Statistics)

## Visualization - Item Trends

### By League

In [None]:
league_indexed_items = item_df_pared.set_index("League")
league_indexed_items = league_indexed_items.sort_index()
league_name_list = list(league_indexed_items.index.unique())
league_name_list

### By Group

## Visualization - Currency Trends

### By League

### By Group

## Inferential Statistics