<h1> Level Based Classification </h1>
<font color = "blue">
Content:

1. [Import Libraries](#1) 
2. [Load and Check Data](#2) 
3. [Numerical and Categorical Variables](#3) 
4. [Data Analysis](#4)


<a id="1"></a>
## Import Libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt



import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


<a id="2"></a>
## Load and Check Data

In [None]:
df = pd.read_csv("/kaggle/input/persona-data/persona.csv")
df.head()

In [None]:
def check_df(dataframe):
    print("****************** Shape ******************")
    print(df.shape)
    print("****************** Head ******************")
    print(df.head())
    print("****************** Tail ******************")
    print(df.tail())
    print("****************** Dtypes ******************")
    print(df.dtypes)
    print("****************** NA ******************")
    print(df.isnull().sum())
    print("****************** Quantiles ******************")
    print(dataframe.quantile([0, 0.05, 0.50, 0.95, 0.99, 1]).T)

In [None]:
check_df(df)

<a id="3"></a>
## Numerical and Categorical Variables

In [None]:
num_cols = [col for col in df.columns if df[col].dtype in [int, float]]
cat_cols = [col for col in df.columns if df[col].dtype == "O"]
print(f"Categorical Columns: {cat_cols}\nNumerical Columns: {num_cols}")

<a id="4"></a>
## Data Analysis

In [None]:
def cat_summary(dataframe,col_name, plot=False):
    print("Frequency of Categorical Variable:\n{}".format(dataframe[col_name].value_counts()))
    print("##############################")
    print(pd.DataFrame({col_name: dataframe[col_name].value_counts(),
                        "Ratio": 100 * dataframe[col_name].value_counts() / len(dataframe)}))
    print("##############################")
    print(f"Unique Number Of {col_name}: ",dataframe[col_name].nunique(), dataframe[col_name].unique()[:5])       
    print("##############################")
 
    if plot:
        sns.countplot(x=dataframe[col_name], data=dataframe)
        plt.show()
def num_summary(dataframe, col_name, plot=False):
    quantiles = [0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60, 0.70, 0.80, 0.90, 0.95, 0.99]
    print(dataframe[col_name].describe(quantiles).T)
    print(f"Unique Number Of {col_name}: ",dataframe[col_name].nunique())
    if plot:
        dataframe[col_name].hist(bins=20)
        plt.xlabel(col_name)
        plt.title(col_name)
        plt.show()



In [None]:
cat_summary(df,"SEX",plot=True)

In [None]:
num_summary(df,"AGE",plot=True)

In [None]:
df.groupby(["COUNTRY", "SOURCE","SEX","AGE"])["PRICE"].sum().reset_index().head()


In [None]:
agg_df = df.groupby(["COUNTRY", "SOURCE", "SEX", "AGE"]).agg({"PRICE": "sum"}).sort_values("PRICE", ascending=False)
agg_df = agg_df.reset_index()
agg_df.head()

In [None]:
label_list = ['0_18', '19_23', '24_31', '32_41', '42_70']
agg_df["AGE_CAT"] = pd.cut(agg_df["AGE"], bins=[0,18,23,31,41,70], labels=label_list)
agg_df["AGE_CAT"]

In [None]:
col_list = ["COUNTRY","SEX","SOURCE","AGE_CAT"]
agg_df["customers_level_based"] = ["_".join(i).upper() for i in agg_df[col_list].values]
agg_df = agg_df.groupby("customers_level_based").agg("PRICE").mean()
agg_df = agg_df.reset_index()


### Segment Creation

In [None]:
agg_df["SEGMENT"] = pd.qcut(agg_df["PRICE"], 4, labels=["D","C","B","A"])
agg_df.groupby(["SEGMENT"]).agg({"PRICE": ["mean", "max", "sum"]})
A = agg_df[agg_df["SEGMENT"] == "A"].describe().T
B = agg_df[agg_df["SEGMENT"] == "B"].describe().T
C = agg_df[agg_df["SEGMENT"] == "C"].describe().T
agg_df.head()

In [None]:
A

In [None]:
B

In [None]:
C

In [None]:
sns.barplot(x="SEGMENT",y="PRICE", data=agg_df).set_title("PRICE-SEGMENT");

### Price Prediction Function

In [None]:
def user_detection(country, sex, sources, age,dataframe = agg_df, col = "customers_level_based" ):
    if (0 <= age <= 18):
        age_cat = "0_18"
    if (19 <= age <= 23):
        age_cat = "19_23"
    if (24 <= age <= 31):
        age_cat = "24_31"
    if (32 <= age <=41):
        age_cat = "32_41"
    if (42 <= age <= 70):
        age_cat = "42_70"

    new_user = country.upper()+ "_"+ sex.upper() + "_" + sources.upper() + "_" + age_cat
    return dataframe[dataframe[col] == new_user]

### Prediction

In [None]:
user_detection("FRA","FEMALE","IOS",32)

In [None]:
user_detection("TUR","MALE","ANDROID",25)