In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

!pip install google_trans_new
from google_trans_new import google_translator 
translator = google_translator()  

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# **INTRODUCTION**

The dataset has over 10k rows, consisting sales order demand of a e-commerce company in Turkey. All order demands have arrived to its customers.
Lets hope customers do not need this orders again :)

> **Load and Check Data**

In [None]:
df= pd.read_csv("../input/herbal-product-sales-ecommerce-20122013/BitkiselUrunSatisiVerileri.csv")
df.head()

In [None]:
df.info()

**Variable Description**
1. id: Unique id number for purchase order of each customer.
2. date: Date of purchase order.
3. sex: male or female in Turkish.
4. city: Turkey's cities.
5. product category: Category of the purchased product.
6. MontH: month of the year.
7. season: Winter, summer and others.
8. region: 7 region of Turkey.


> **Univariate Variable Analysis**

* Categorical Variables : sex,city,product category, season, region, MontH
* Numerical Variables: id
* Time : date

> **Preprocessing Data**

In [None]:
#correction of space chars in feature names.
df.columns=[(each.split()[0]+"_"+each.split()[1]) if len(each.split(" "))>1 else each for each in df.columns]

#correction of MontH to lowercase.
df.columns=df.columns.str.lower()

#drop id field. We do not need.
df.drop('id',  axis='columns', inplace=True)
print(df.columns)

#sort by date ascending
df.sort_values("date", axis=0, ascending=True, inplace=True)

#set index for sorted value 
df=df.reset_index(drop=True)
df.head()

> **Translate all data to English via Google Translate library of Python**

In [None]:
# Two function for translate data and replacing translated data.
def translateToEnglish(word):
    #translate from Turkish to English
    translate_text = translator.translate(word, lang_src='tr', lang_tgt='en')  
    return translate_text

def replaceColumnValues(dictionary,categorical_features):
    
    #lowercase all of the value of dictionary
    dictionary={k:v.lower()
        for k, v in
            dictionary.items()
                }
    
    #replace translated values
    for each in categorical_features:
        df[each].replace(dictionary, inplace=True)
        
    # lets correct some translate issue   
    df["sex"].replace({"lady ":"female"}, inplace=True)
    df["product_category"].replace({"hairdy ":"hair care"}, inplace=True)
    df["product_category"].replace({"weakening ":"obesity"}, inplace=True)
    df["product_category"].replace({"maintenance ":"personal care"}, inplace=True)
    df["month"].replace({"engagement ":"april"}, inplace=True)
    df["month"].replace({"hazy ":"may"}, inplace=True)
    


In [None]:
dictionary={}
categories = {}
#fetch only appropriate features/columns of dataset
columns=df.columns
categorical_features = [ each for each in columns if each not in ["id","date","city"] ]


for each in categorical_features:
    categories[each] = df[each].unique()
    print({each: categories[each]} )


for key,value in categories.items():
    for index,value in enumerate(value):
        #{Turkish keys, English values}
        dictionary[value]=translateToEnglish(value)

replaceColumnValues(dictionary,categorical_features)

  

In [None]:

print({"sex": df["sex"].unique()})
print({"product_category":df["product_category"].unique()})
print({"month":df["month"].unique()})
print({"season":df["season"].unique()})
print({"region":df["region"].unique()})
df.head()

> **Categorical Variabes**

In [None]:
def bar_subplots(categorical_feature):
    """
        input: categorical feature like "sex"
        output: bar plot
    """
    # get feature
    var = df[categorical_feature]
    # count number of categorical variable(value/sample)
    varValue = var.value_counts()
    
    explode = (0.25,)* len(df[categorical_feature].unique())
                       
    print(varValue.index)
    # visualize
    plt.figure(figsize = (6,6))
    varValue.plot(kind="pie",explode=explode ,startangle=40,autopct='%1.1f%%',shadow=True)#,varValue.index, varValue)
    #plt.xticks(varValue.index, varValue.index.values)
    plt.ylabel("Sales Amount")
    plt.title(categorical_feature)
    plt.show()
   

In [None]:
from sklearn.compose import make_column_selector as selector
column_selector = selector(dtype_include=object)
categorical_features = column_selector(df)
print("all categorical features: " ,categorical_features)

#select suitable categorical features
categorical_features = [ each for each in categorical_features if each not in ["id","date","city"] ]
print("suitable categorical features: ",categorical_features)

#1 longer method
#for categorical_feature in categorical_features:
#    bar_subplots(categorical_feature)

#2 long method
#y=map(lambda x:bar_subplots(x),categorical_features)
#print(list(y))

#3 short method
#y=map(bar_subplots,categorical_features)
#print(list(y))

#4 shorter method
list(map(bar_subplots,categorical_features))