<a href="https://colab.research.google.com/github/junjunmeng/Data-Science--Cheat-Sheet/blob/master/Take_home_Assignment_template.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Table of Contents

*  Initial Data Analysis
*  Data Wrangling
*  Exploratory Data Analysis
*  Statistical Analysis
*  Machine Learning





In [None]:
pip install etsy_py

In [4]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import pandas_profiling as pp
# import etsy_py
from scipy.stats import shapiro
from scipy.stats import skew
from scipy.stats import kurtosis

from sklearn.cluster import DBSCAN
import sklearn as sklearn
from sklearn.utils import resample
from sklearn import preprocessing as preprocessing
from imblearn.over_sampling import SMOTE
from sklearn import metrics
from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve, classification_report, accuracy_score, mean_squared_error
from sklearn.metrics import precision_recall_curve, auc, make_scorer, recall_score, accuracy_score, precision_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score
import statsmodels.api as sm
import numpy as np

  import pandas.util.testing as tm


# <font color = 'blue'> 1. Data Clean

#### Initial Analysis

In [None]:
def initial_analysis(df):
    """
    Given a dataframe produces a simple report on initial data analytics
    Params:
        - df 
    Returns:
        - Shape of dataframe records and columns
        - Columns and data types
    """
    print('Report of Initial Data Analysis:\n')
    print(f'Shape of dataframe: {df.shape}')
    print(f'Features and Data Types: \n {df.dtypes}')
    print("DataFrame Row Number: ", df.shape[0])
    print("Unique IDs: ", df.ID.nunique())

#### Percentage of missing value

In [None]:
def percent_missing(df):
    """
    Given a dataframe it calculates the percentage of missing records per column
    Params:
        - df
    Returns:
        - Dictionary of column name and percentage of missing records
    """
    col=list(df.columns)
    perc=[round(df[c].isna().mean()*100,2) for c in col]
    miss_dict=dict(zip(col,perc))
    return miss_dict

#### Missing value exploration

In [None]:
# missing values in a column, especially response variable
def missing_class(df, col_name):
  """
      Given a dataframe and colume, it calculates the count of missing records
    Params:
        - df: dataframe
        - col_name : col_name
    Returns:
        - number of records
  """
  missing_vals = ["No idea", np.nan, "#", "?"]
  print("Number of missing rows within " + col_name +" :"  + str(df[col_name].isnull().sum(axis=0)))

  # replace missing or abnormal value with np.nan
  df[col_name] = df[col_name].replace(missing_vals, np.nan)

  # dropping na
  df.dropna(subset = [col_name], inplace=True)
  
  # check the cleaned colname
  print(df.groupby(col_name).size())


#### Converting data types for selected columns

In [None]:
# covert dataframe columns to numeric or other types

def convert_num(df, columns):
  """
      Given a dataframe and colume, it convert to another data type
    Params:
        - df: dataframe
        - columns : list of columns
    Returns:
        - dataframe with converted data type
  """
  for i in range(0, len(columns)):
    df[columns[i]] = df[columns[i]].astype("int64")

    

#### Duplicate check, basically two situations
1. duplicated identical rows
2. same ID with different values

In [None]:
# any duplicate rows? especially in ID

def duplicate_row_remove(df, col_name):
  """
      Given a dataframe and colume, remove duplicated rows
    Params:
        - df: dataframe
        - col_name(str): a column name, e.g "ID"
    Returns:
        - number of remaining rows
  """
  global breast # dataframe name
  # original length of dataframe
  original_length = len(df)
  # Number of unique IDs
  print("Number of unique IDs: " + str(df[col_name].nunique()))
  # remove duplicated rows
  df = df[~df.loc[:, col_name:].duplicated()]

  # new length of dataframe
  new_length = len(df)
  # count of rows removed
  rows_removed = original_length - new_length
  print("Number of identical replicated rows should removed: " + str(rows_removed))
  print("Number of remaining rows: " + str(len(df)))
  breast = df

In [None]:
# same user ID with different values

def duplicate_ID(df, col_name):
  """
      Given a dataframe and colume, remove non-identical rows with same ID
    Params:
        - df: dataframe
        - col_name: a column name, e.g ID
    Returns:
        - number of remaining rows
  """
  global breast # dataframe name
  ID_dup = df[df[col_name].duplicated()]
  print("Number of duplicated ID: " + str(len(ID_dup)))

  # keep the smallest index for each user
  df = df.sort_values(by= [col_name, "Index"], ascending = True)
  df = df[~df[col_name].duplicated(keep = 'first')]
  print("Number of remaining rows: " + str(len(df)))
  breast = df

#### Check and remove out-ranged Value

In [None]:
def Incorrect_feature_values(df,columns, val, col_name ):
  """
      Given a dataframe and value, remove col_name's value > val
    Params:
        - df(dataframe): dataframe
        - col_name (str): a column name, e.g "ID"
        - val(int): specific value 
        - columns(list): columns list that need to check the value range
    Returns: dataframe removed rows with out-ranged value
  """
  global breast
  rows_with_large_vals = []
  for col in range(0, len(columns)):
    filter_by_col = df[columns[col]] > val
    ID_vals = pd.array(df[filter_by_col][col_name])
    rows_with_large_vals.append(ID_vals)

  # group list of lists into 1 list
  flat_list = []
  for sublist in rows_with_large_vals:
    for item in sublist:
      flat_list.append(item)
  
  # list of users with out range values
  users_with_large_vals = pd.array(flat_list).unique()
  print("There are "+ str(len(users_with_large_vals)) + " users with values that exceed " + str(val))

  # remove record with any out-range values
  df = df[~df[col_name].isin(pd.array(users_with_large_vals))]
  breast = df

#### Re-assign value to a column

In [None]:
# rename the class based on code
def reClass(df, col_name):
  """
      Given a dataframe and col_name, re-assign value according to multiple conditions
    Params:
        - df(dataframe): dataframe
        - col_name (str): a column name, e.g "ID"
    Returns: dataframe's column with re-assigned value
  """
  global breast
  class_new = []
  for item in df[col_name]:
    if item == 2:
      class_new.append(0)
    elif item ==  4:
      class_new.append(1)
    else:
      class_new.append(np.nan)
  #df.drop(df_col)
  df['Class_new'] = class_new
  # drop original column
  df = df.drop(col_name, axis = 1)
  # assign new column as original column name
  df = df.rename(columns = {'Class_new': col_name})
  # remove NA
  df = df[df[col_name] != np.nan]
  breast = df


#### Aggregation summary of each columns in a dataframe

In [None]:
# check the aggregation for each column
def col_agg(df, col_list):
  col_summary = []
  for name in col_list:
    col_summary.append(df.groupby(name).size().reindex())
  return col_summary

# eg.  col_agg(breast, breast.columns[1:12])

# <font color = 'blue'> 2. Data Exploratory Analysis

#### Set Profile Report

In [None]:
pip install --upgrade pandas_profiling
def profile(df):

  """
    Given a dataframe, return data profile 
    Params:
      - df(dataframe): dataframe
    Returns: data profile in html format
   """
  global prof
  from pandas_profiling import ProfileReport
  prof = ProfileReport(df)
  prof.to_file(output_file= "data_profile.html")
  return prof

#### Count plot in Seaborn

In [None]:
def sub_countplot(df, Class):
  """
    Given a dataframe, return data profile 
    Params:
      - df(dataframe): dataframe
      - Class(str): col_name, e.g "Class", usually response variable
    Returns: count_plot by features
  """ 
  features = df.columns[1:-1].to_list()
  feature_num = len(features)
  x = 3
  y = feature_num//3 + feature_num%3
  fig, ax = plt.subplots(x, y, figsize= (15,15))
  for i in range(feature_num):
    sns.countplot(x= features[i], hue= Class, data = df, ax = ax[i//3, i%3])


#### Check normality

In [None]:
def normality_test(df,col_list):
    """
    Given a dataframe determines whether each numerical column is Gaussian 
    H0 = Assumes distribution is not Gaussian
    Ha = Assumes distribution is Gaussian
    Params:
        - df
    Returns:
        - W Statistic
        - p-value
        - List of columns that do not have gaussian distribution
    """
    non_gauss=[]
    w_stat=[]
    # Determine if each sample of numerical feature is gaussian
    alpha = 0.05
    for n in col_list:
        stat,p=shapiro(df[n])
        print(sns.distplot(df[n]))
        print(n, "skew is:", skew(df[n]), "kurtosis is :", kurtosis(df[n]))


        if p <= alpha: # Reject Ho -- Distribution is not normal
            non_gauss.append(n)
            w_stat.append(stat)
    # Dictionary of numerical features not gaussian and W-Statistic        
    norm_dict=dict(zip(non_gauss,w_stat))
    return norm_dict

#### Outliers by Boxplot

In [None]:
# Outliers by boxplot
col_names = breast.columns[2:11]
col_names

breast.boxplot(column=['Clump Thickness', 'Uniformity of Cell Size',
       'Uniformity of Cell Shape', 'Marginal Adhesion',
       'Single Epithelial Cell Size', 'Bare Nuclei', 'Bland Chromatin',
       'Normal Nucleoli','Mitoses' ], grid=False, rot=90, fontsize=11)


#### Data transformation by boxcox

In [None]:
# data transformation?
from scipy.stats import boxcox
plt.hist(boxcox(breast['Clump Thickness'],1))



#### Visualize the binary classes count

In [None]:
def class_fig(col):
  """
  Given the response variable, it visulize the count in binary classes
  Params:
    - Series
  Returns:
    - countplot
  """
  sns.countplot(col, label = 'Count')
  plt.show()

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report, confusion_matrix

#### Correlation heatmap

In [None]:
# correlation between features

def corr_heatmap(df, col_names):
  corr = df[col_names].corr()
  mask = np.zeros_like(corr)
  mask[np.triu_indices_from(mask)] = True
  sns.heatmap(corr, annot= True, vmin= -1, vmax= 1, mask= mask )
  plt.show()


#### Feature Importance by Random Forest

In [None]:
# feature importance by Random Forest

def rf_importance(X,y):
  X_train, X_test, y_train, y_test = train_test_split(X,y, test_size= 0.3, random_state = 42)
  rf = RandomForestClassifier()
  rf.fit(X_train,y_train)
  
  # Feature Importance
  importance_rf = pd.Series(rf.feature_importances_, index = X_train.columns)
  sorted_importance_rf = importance_rf.sort_values()
  sorted_importance_rf.plot(kind = 'barh', color = 'lightgreen')
  plt.title("Feature Importance by Random Forest")
  plt.show()

# <font color = 'blue'> 3. Modeling

#### <font color = 'red'> Model_util package

In [None]:
from functools import partial
import copy

import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib 

from sklearn.metrics import confusion_matrix, cohen_kappa_score, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV, PredefinedSplit, KFold
from sklearn.ensemble import RandomForestRegressor

from xgboost.sklearn import XGBRegressor, XGBClassifier
import lightgbm as lgb



class ClassifierModel(object):
    """
    A wrapper class for regression models.
    It can be used for training and prediction.
    Can plot feature importance and training progress (if relevant for model).
    """

    def __init__(self, model_wrapper=None):
        """
        Args: 
            columns (list): 
            model_wrapper: 
        """
        self.model_wrapper = model_wrapper
        
        
    def save_model(self, save_path):
        joblib.dump(self, save_path, compress = 1)

        
    def fit(self, X, y,
            n_splits,
            params=None,
            eval_metric='logloss',
            plot=True,
            plot_title=None,
            verbose=1):
        
        """
        Training the model.

        Args:
            X (pd.DataFrame), y: training data. 
            n_splits: cross-validation splits the data. 
            params (dict): training parameters. Including hyperparameters and:
                params['objective'] (str): 'regression' or 'classification',
                params['verbose'] (bool),
                params['cat_cols'] (list): categorical_columns, only used in LGB and CatBoost wrappers.
                params['early_stopping_rounds'] (int).
            eval_metric (str): metric for validataion.
            plot (bool): if true, plot 'feature importance', 'training curve', 'distribution of prediction', 'distribution of error'.
        """
        
        self.eval_metric = eval_metric
        self.verbose = verbose
        folds = KFold(n_splits=n_splits, shuffle=True, random_state=42)
        self.columns = X.columns.to_list()
        
        self.models = []  # if n_splits=5, save 5 models.
        self.scores = []  # if n_splits=5, save 5 score items. Each is like: {'validation_0': {'rmse': 396.888855}, 'validation_1': {'rmse': 417.889496}}
        self.feature_importances = pd.DataFrame(columns=['feature', 'gain'])  #  if n_splits=5, then self.feature_importances is the stack of the 5 models.
        self.oof = np.empty(X.shape[0])   # Predicted results using cross-validation. OOF: "Out-of-fold".
        self.oof[:] = np.NaN
        
        for fold_n, (train_index, valid_index) in enumerate(folds.split(X, y)):
            X_train, X_valid = X.iloc[train_index].copy(), X.iloc[valid_index].copy()
            y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]
            model = copy.deepcopy(self.model_wrapper)
            model.fit(X_train, y_train, X_valid, y_valid, params=params)
            
            self.models.append(model)
            self.scores.append(model.best_score_)
            self.oof[valid_index] = model.predict(X_valid).reshape(-1,)
            
            fold_importance = pd.DataFrame({
                                    'feature': X_train.columns,
                                    'gain': model.feature_importances_
                                })
            self.feature_importances = self.feature_importances.append(fold_importance)
            
            if self.verbose > 1:
                print(f'\nFold {fold_n} started.')
                for val in model.best_score_.keys():
                    print(f"{self.eval_metric} score on {val}: {model.best_score_[val][self.eval_metric]:.3f}.")

        self.calc_scores_()
        
        if plot:
            # print(classification_report(y, self.oof.argmax(1)))
            fig, ax = plt.subplots(figsize=(32, 8))
            plt.subplot(1, 3, 1)
            self.plot_feature_importance()
            plt.subplot(1, 3, 2)
#             self.plot_feature_importance()
            self.plot_learning_curve()
#             plt.subplot(1, 4, 3)
#             self.plot_learning_curve()
    
    
    def predict(self, X_test, averaging='usual'):
        """
        Make prediction

        Args:
            X_test (pd.DataFrame): test data
            averaging: method of averaging
            
        Return:
            list: prediction of X_test
        """
        
        full_prediction = np.zeros(X_test.shape[0])
        for i in range(len(self.models)):
            y_pred = self.models[i].predict(X_test).reshape(-1)
            if averaging == 'usual':
                full_prediction += y_pred
            elif averaging == 'rank':
                full_prediction += pd.Series(y_pred).rank().values
        return full_prediction / len(self.models)
        

    def calc_scores_(self):
        """
        Average the scores from the n_splits cross validation.
        """
        self.ave_scores = {}
        sets = [k for k in self.scores[0]]  # sets = ['validation_0', 'validation_1']
        print(f'\nFinished cross-validation training.')
        for val in sets:
            scores = [score[val][self.eval_metric] for score in self.scores]
            if self.verbose:
                print(f"CV mean {self.eval_metric} score on {val}: {np.mean(scores):.3f} +/- {np.std(scores):.3f} std.")
            self.ave_scores[val] = np.mean(scores)  # self.ave_scores: {'validation_0': 398.9524596, 'validation_1': 408.9034486}


    def plot_feature_importance(self, drop_null_importance=True, top_n=20):
        """
        Plot feature importance.

        Args:
            drop_null_importance (bool): drop columns with null feature importance
            top_n (int): show top n features.
        """
        
#         fig = plt.figure(figsize=(8, 8))
        top_feats = self.get_top_features(drop_null_importance, top_n)
        feature_importances = self.feature_importances.loc[self.feature_importances.loc[:, 'feature'].isin(top_feats)]
        feature_importances.loc[:, 'feature'] = feature_importances.loc[:, 'feature'].astype(str)
        top_feats = [str(i) for i in top_feats]
        sns.barplot(data=feature_importances, x='gain', y='feature', orient='h', order=top_feats)
        plt.title("Feature Importance")
    

    
    def get_top_features(self, drop_null_importance=True, top_n=20):
        """
        Get top features by importance.
        
        Args:
            drop_null_importance (bool): drop columns with null feature importance
            top_n (int): show top n features.
        """
        
        grouped_feats = self.feature_importances.groupby(['feature'])['gain'].mean()  # average over folds.
        if drop_null_importance:
            grouped_feats = grouped_feats[grouped_feats != 0]
        return list(grouped_feats.sort_values(ascending=False).index)[:top_n]

    
    def plot_learning_curve(self):
        """
        Plot training learning curve.
        Inspired by `plot_metric` from https://lightgbm.readthedocs.io/en/latest/_modules/lightgbm/plotting.html
        
        An example of model.evals_result_: 
            {
                'validation_0': {'rmse': [0.259843, 0.26378, 0.26378, ...]},
                'validation_1': {'rmse': [0.22179, 0.202335, 0.196498, ...]}
            }
            
            'validation_0' represent train set;
            'validation_1' represent validation set;
        """
        
#         fig = plt.figure(figsize=(8, 8))
        full_evals_results = pd.DataFrame()
        for model in self.models:
            evals_result = pd.DataFrame()
            for k in model.model.evals_result_.keys():  # iterate through different sets.
                evals_result[k] = model.model.evals_result_[k][self.eval_metric]
            evals_result = evals_result.reset_index().rename(columns={'index': 'iteration'})
            full_evals_results = full_evals_results.append(evals_result)

        full_evals_results = full_evals_results.melt(id_vars=['iteration']).rename(columns={'value': self.eval_metric,
                                                                                            'variable': 'dataset'})
        sns.lineplot(data=full_evals_results, x='iteration', y=self.eval_metric, hue='dataset')
        plt.title('Train Learning-Curve')
        
        
#################################################################
# Model Wrappers.
#################################################################

class RandForest_regr(object):
    """
    A wrapper for sklearn RandomForestRegressor model so that we will have a single api for various models.
    
    Example of params:
    params = { 
        'n_estimators': 100,
        'criterion': 'mse',
        'max_depth': 7,
        'min_samples_split': 2,
        'n_jobs': -1,
        'random_state': 123,
        'verbose': 0,
    }
    """

    def __init__(self):
        self.model = RandomForestRegressor()
        
    def fit(self, X_train, y_train, X_valid=None, y_valid=None, params=None):
        self.model.set_params(**params)
        self.model.fit(X=X_train, y=np.array(y_train).reshape(-1))
        score = mean_squared_error(y_train, self.model.predict(X_train))
        self.best_score_ = score
        self.feature_importances_ = self.model.feature_importances_
        
    def predict(self, X_test):
        return self.model.predict(X_test)
        
        

class XGBWrapper_clf(object):
    """
    A wrapper for xgboost model so that we will have a single api for various models.
    
    Example of params:
    params = { 
        'n_estimators': 50,  #################
        'max_depth':  3,  #################
        'learning_rate': 0.01, 
    #     'min_child_weight': np.arange(1, 4, 1),
    #     'gamma': np.arange(0, 0.03, 0.01),
    #     'reg_alpha': np.arange(0, 0.01, 0.003),
        'objective': 'reg:squarederror', #['reg:squaredlogerror']#, # squared loss.
        'verbose': 0,
        'early_stopping_rounds': None,
        'n_jobs': -1,
        'random_state': 123
    }
    """

    def __init__(self):
        self.model = XGBClassifier()

    def fit(self, X_train, y_train, X_valid=None, y_valid=None, params=None):

        self.model = self.model.set_params(**params)
        
        eval_set = [(X_train, y_train)]
        if X_valid is not None:
            eval_set.append((X_valid, y_valid))

        self.model.fit(X=X_train, y=y_train,
                       eval_set=eval_set, eval_metric='logloss',
                       verbose=params['verbose'], early_stopping_rounds=params['early_stopping_rounds'])

        scores = self.model.evals_result()
        self.best_score_ = {k: {m: m_v[-1] for m, m_v in v.items()} for k, v in scores.items()}
#         self.best_score_ = {k: {m: n if m != 'cappa' else -n for m, n in v.items()} for k, v in self.best_score_.items()}

        self.feature_importances_ = self.model.feature_importances_
    
    def predict(self, X_test):
        return self.model.predict(X_test)

#     def predict_proba(self, X_test):
#         if self.model.objective == 'binary':
#             return self.model.predict_proba(X_test, ntree_limit=self.model.best_iteration)[:, 1]
#         else:
#             return self.model.predict_proba(X_test, ntree_limit=self.model.best_iteration)


  import pandas.util.testing as tm


#### SMOTE for resampling

In [None]:
# SMOTE for Upsampling training 

from imblearn.over_sampling import SMOTE
X_train, X_test, y_train, y_test = train_test_split( breast[breast.columns[0:8]], breast['Class'], test_size= 0.3, random_state = 42)
X_res, y_res = SMOTE(random_state = 42).fit_sample(X_train, y_train)

#### Normalization

In [None]:
X_train = pd.DataFrame(preprocessing.normalize(X_train))
X_test = pd.DataFrame(preprocessing.normalize(X_test))

#### Standardlization


In [None]:
# standardlization

ss = StandardScaler()
train_X = ss.fit_transform(train_X)
test_X = ss.fit_transform(test_X)

#### Logistic Regression with sklearn

In [None]:
# Logistic Regression 

def LogReg(X_res, y_res, X_test, y_test):
  from sklearn.linear_model import LogisticRegression
  from sklearn.metrics import classification_report, confusion_matrix
  from sklearn.metrics import roc_auc_score
  from sklearn.metrics import roc_curve
  logreg = LogisticRegression().fit(X_res, y_res)
  y_pred = logreg.predict(X_test)
  print("confusion_matrix:\n", confusion_matrix(y_test, y_pred))
  print("Classification report:\n", classification_report(y_test, y_pred))
  print("ROC:", roc_auc_score(y_test, y_pred))

  # compute predicted probabilites: y_pred_prob
  y_pred_prob = logreg.predict_log_proba(X_test)[:, 1]

  # generate ROC curve

  fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)

  # plot ROC curve

  plt.plot([0,1], [0,1], 'k--')
  plt.plot(fpr, tpr)
  plt.xlabel('False Positive Rate')
  plt.ylabel('True Positive Rate')
  plt.title('ROC Curve')
  plt.show()


#### Logistic Regression with statsmodels.api

In [None]:
def logit_SM(X_train, y_train):
  import statsmodels.api as sm
  logit_model = sm.Logit(y_train.values.ravel(), X_train)
  result = logit_model.fit()
  print(result.summary2())

#### Random Forest

In [None]:
# random Forest
def RF_pipe(X_res, y_res, X_test, y_test):
  from sklearn.ensemble import RandomForestClassifier
  from sklearn.metrics import classification_report, confusion_matrix
  from sklearn.metrics import roc_auc_score
  from sklearn.metrics import roc_curve
  rf = RandomForestClassifier().fit(X_res, y_res)
  y_pred = rf.predict(X_test)
  print("confusion_matrix:\n", confusion_matrix(y_test, y_pred))
  print("Classification report:\n", classification_report(y_test, y_pred))
  print("ROC:", roc_auc_score(y_test, y_pred))




# <font color = 'green'> Example 1: Conversion rate
The goal of this challenge is to build a model that predicts conversion rate and, based on the model, come up with ideas to improve it.

We have data about all users who hit our site: whether they converted or not as well as some of their characteristics such as their country, the marketing channel, their age, whether they are repeat users and the number of pages visited during that session (as a proxy for site activity/time spent on site).

Your project is to:

1.   Predict conversion rate
2.   Come up with recommendations for the product team and the marketing team to improve conversion rate
 


## Data Exploration with Seaborn
[Seaborn Gallary](https://seaborn.pydata.org/examples/index.html)

In [6]:
data = pd.read_csv("/content/drive/My Drive/M- DataMarked/01- TakeHomeDataChallenges-master (20 题)/01.ConversionRate/conversion_data.csv")
data.head()

Unnamed: 0,country,age,new_user,source,total_pages_visited,converted
0,UK,25,1,Ads,1,0
1,US,23,1,Seo,5,0
2,US,28,1,Seo,4,0
3,China,39,1,Seo,5,0
4,US,30,1,Seo,6,0
