<a href="https://www.kaggle.com/luckymouse0/simple-series-analysis?scriptVersionId=87298444" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import numpy as np # linear algebra
import matplotlib.pyplot as plt
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# Import the dataset
dataset = pd.read_csv('/kaggle/input/atm-data-m2/atm_data_m2.csv')

# Separate target from predictors
y = dataset.total_amount_withdrawn
X = dataset.drop(['total_amount_withdrawn', 'Unnamed: 0'], axis=1)

print(X.head())

In [None]:
from sklearn.model_selection import train_test_split

# Divide data into training and validation subsets
X_train, X_valid, y_train, y_valid = train_test_split(X, y, train_size=0.8, test_size=0.2, random_state=0)

# Get training subset info
X_train.info()

In [None]:
from sklearn.preprocessing import OrdinalEncoder

# Get the columns with categorical data 
s = (X_train.dtypes == 'object')
object_cols = list(s[s].index)

# Make copy to avoid changing original data 
label_X_train = X_train.copy()
label_X_valid = X_valid.copy()

# Apply ordinal encoder to each column with categorical data
ordinal_encoder = OrdinalEncoder()
label_X_train[object_cols] = ordinal_encoder.fit_transform(X_train[object_cols])
label_X_valid[object_cols] = ordinal_encoder.transform(X_valid[object_cols])

print(label_X_train.head())

In [None]:
# Using Random Forest Regressor

from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

def random_forest_regressor(X_train, X_valid, y_train, y_valid, n_estimat):
    model = RandomForestRegressor(random_state=0, n_estimators=n_estimat)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_absolute_error(y_valid, preds)

print("MAE from Random Forest Regressor:") 
print(random_forest_regressor(label_X_train, label_X_valid, y_train, y_valid, 100))

In [None]:
# Using XGBoost

from xgboost import XGBRegressor

def xgb_regressor(X_train, X_valid, y_train, y_valid, n_estimat=1000, l_rate=0.01, early=5):
    model = XGBRegressor(random_state=0, n_estimators=n_estimat, learning_rate=l_rate)
    model.fit(X_train, y_train,
              early_stopping_rounds=early,
              eval_set=[(X_valid, y_valid)],
              verbose=False)
    preds = model.predict(X_valid)
    return mean_absolute_error(y_valid, preds)

print("MAE from XGBoost:") 
print(xgb_regressor(label_X_train, label_X_valid, y_train, y_valid, 5000, 0.001))