In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats as st

### データ読み込み

In [None]:
train_kaggle = pd.read_csv('train_kaggle.csv')
train_kaggle.datetime = pd.to_datetime(train_kaggle.datetime)
train_kaggle.datetime.min(), train_kaggle.datetime.max()

### 目的変数をドルに変換

In [None]:
minimum_unit = train_kaggle.operation_value.abs().min()
train_kaggle['target'] = (train_kaggle.operation_value / minimum_unit).astype(int)
train_kaggle.head()

In [None]:
minimum_unit

### ヒストグラム

In [None]:
print(train_kaggle.target.mean(),train_kaggle.target.min(),train_kaggle.target.max(),)
train_kaggle.target.hist(bins=30);

In [None]:
by_atm = {}
for atm_id in train_kaggle.atm_id.unique():
    by_target = atm_id == train_kaggle.atm_id 
    byatm = train_kaggle[by_target].reset_index(drop=True) 
    by_atm[atm_id] = byatm

### ヒートマップ

In [None]:
train_kaggle['dayofweek'] = train_kaggle.datetime.dt.dayofweek
train_kaggle['day'] = train_kaggle.datetime.dt.day
train_kaggle['month'] = train_kaggle.datetime.dt.month
train_kaggle['week'] = train_kaggle.datetime.dt.week
sns.heatmap(train_kaggle.pivot_table(index='day', columns='dayofweek', values='target'));

### ATM別のデータ件数

In [None]:
print(len(train_kaggle.atm_id.unique()))
train_kaggle.atm_id.value_counts().plot(kind='bar');

### 前処理

In [None]:
data_preprocessed = train_kaggle.pivot_table(
    index=['datetime', 'atm_id', 'month', 'week', 'dayofweek', 'day'], 
    values='target',
    aggfunc='sum').reset_index();

In [None]:
data_preprocessed['month*2'] = data_preprocessed['month'] ** 2
data_preprocessed['month*3'] = data_preprocessed['month'] ** 3
data_preprocessed['month*4'] = data_preprocessed['month'] ** 4
data_preprocessed['week*2'] = data_preprocessed['week'] ** 2
data_preprocessed['week*3'] = data_preprocessed['week'] ** 3
data_preprocessed['week*4'] = data_preprocessed['week'] ** 4
print(data_preprocessed.shape)
data_preprocessed.head()

In [None]:
data_preprocessed = pd.get_dummies(data_preprocessed, columns=['dayofweek'], drop_first=True)

In [None]:
#給料日の特徴量を追加

data_preprocessed['salary10th']=train_kaggle['day'].replace({10: 1, 1: 0, 2: 0, 3: 0, 4: 0
                                                   , 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 11: 0
                                                   , 12: 0, 13: 0, 14: 0, 15: 0, 16: 0
                                                   , 17: 0, 18: 0, 19: 0, 20: 0, 21: 0
                                                   , 22: 0, 23: 0, 24: 0, 25: 0, 26: 0
                                                   , 27: 0, 28: 0, 29: 0, 30: 0, 31: 0})

data_preprocessed['salary25th']=train_kaggle['day'].replace({25: 1, 1: 0, 2: 0, 3: 0, 4: 0
                                                   , 5: 0, 6: 0, 7: 0, 8: 0, 9: 0, 11: 0
                                                   , 12: 0, 13: 0, 14: 0, 15: 0, 16: 0
                                                   , 17: 0, 18: 0, 19: 0, 20: 0, 21: 0
                                                   , 22: 0, 23: 0, 24: 0, 10: 0, 26: 0
                                                   , 27: 0, 28: 0, 29: 0, 30: 0, 31: 0})
data_preprocessed.head()

In [None]:
dailydata = pd.read_csv('daily_data.csv', encoding='shift_jis')
dailydata.datetime = pd.to_datetime(dailydata.datetime)
dailydata

In [None]:
dailydata = dailydata.drop('年',axis=1)
dailydata = dailydata.drop('月',axis=1)
dailydata = dailydata.drop('日',axis=1)
dailydata['average_temp(℃)'] = dailydata['日平均気温(℃)'].astype(int)
dailydata['max_temp(℃)'] = dailydata['日最高気温(℃)'].astype(int)
dailydata['min_temp(℃)'] = dailydata['日最低気温(℃)'].astype(int)
dailydata['日降水量(mm)'].fillna(0, inplace=True)
dailydata['precipitation(mm)'] = dailydata['日降水量(mm)'].astype(int)
dailydata.dtypes

In [None]:
data_preprocessed = data_preprocessed.merge(dailydata, on='datetime', how='left')
data_preprocessed

In [None]:
data_preprocessed.plot(kind='scatter', x='average_temp(℃)', y='target')

In [None]:
data_preprocessed.plot(kind='scatter', x='max_temp(℃)', y='target')

In [None]:
data_preprocessed.plot(kind='scatter', x='min_temp(℃)', y='target')

In [None]:
data_preprocessed.plot(kind='scatter', x='precipitation(mm)', y='target')

In [None]:
target_col = 'target'
exclude_cols = ['target', 'datetime', 'client_id', 'operation_value', 'atm_id', 'operation_type']
feature_cols = [col for col in data_preprocessed.columns if col not in exclude_cols]
feature_cols

In [None]:
from sklearn.preprocessing import StandardScaler

# 標準化を行う
scaler = StandardScaler()
X = scaler.fit_transform(data_preprocessed[feature_cols])
for i, col in enumerate(feature_cols):
    data_preprocessed[col] = X[:, i]

In [None]:
data_preprocessed

In [None]:
dataset_by_atm = {}
for atm_id in data_preprocessed.atm_id.unique():
    is_target = atm_id == data_preprocessed.atm_id 
    is_notnull = ~data_preprocessed['日平均気温(℃)'].isnull()
    data_by_atm = data_preprocessed[(is_target)&(is_notnull)].reset_index(drop=True)
    dataset_by_atm[atm_id] = data_by_atm

In [None]:
data_by_atm.columns

In [None]:
dataset_by_atm

In [None]:
from datetime import datetime as dt
ratio_date = '2018-08-01 00:00:00'
ratio_date = dt.strptime(ratio_date, '%Y-%m-%d %H:%M:%S')
ratio_date

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error

In [None]:
from  datetime import timedelta
train_start_date = ratio_date - timedelta(days=243)
train_start_date

### 線形回帰

In [None]:
# 線形回帰

score_by_atm = {}
predict_by_atm = {}
target_by_atm = {}
date_by_atm = {}
models = {}


for atm_id in dataset_by_atm.keys():
    
    target_data = dataset_by_atm[atm_id]
    is_test = target_data.datetime >= ratio_date
    test = target_data[is_test].sort_values(by='datetime')
    is_train_start = target_data.datetime >= train_start_date
    train = target_data[(~is_test) & (is_train_start)]
    
    if len(test) == 0:
        print(atm_id)
        break

    test_X = test[feature_cols]
    train_X = train[feature_cols]

    test_y = test[target_col]
    train_y = train[target_col]

    lr = LinearRegression()
    lr.fit(train_X, train_y)
    pred = lr.predict(test_X)
    mse = mean_squared_error(pred, test_y)
    rmse = np.sqrt(mse)

    models[atm_id] = *lr.coef_, lr.intercept_
    score_by_atm[atm_id] = rmse
    predict_by_atm[atm_id] = pred
    target_by_atm[atm_id] = test_y
    date_by_atm[atm_id] = test.datetime

In [None]:
# 線形回帰
plt.bar(score_by_atm.keys(), list(score_by_atm.values()));
np.mean(list(score_by_atm.values()))

In [None]:
score_by_atm.values()

In [None]:
score_by_atm.keys()

In [None]:
# 線形回帰
atm_id = 74
print(score_by_atm[atm_id])
plt.figure(figsize=(20, 10))
plt.plot(date_by_atm[atm_id], target_by_atm[atm_id], label='test')
plt.plot(date_by_atm[atm_id], predict_by_atm[atm_id], label='pred')
plt.legend();

In [None]:
# 線形回帰
atm_id = 87
print(score_by_atm[atm_id])
plt.figure(figsize=(20, 10))
plt.plot(date_by_atm[atm_id], target_by_atm[atm_id], label='test')
plt.plot(date_by_atm[atm_id], predict_by_atm[atm_id], label='pred')
plt.legend();

### リッジ回帰

In [None]:
# リッジ回帰

score_by_atm = {}
predict_by_atm = {}
target_by_atm = {}
date_by_atm = {}
models = {}


for atm_id in dataset_by_atm.keys():
    
    target_data = dataset_by_atm[atm_id]
    is_test = target_data.datetime >= ratio_date
    test = target_data[is_test].sort_values(by='datetime')
    is_train_start = target_data.datetime >= train_start_date
    train = target_data[(~is_test) & (is_train_start)]
    
    if len(test) == 0:
        print(atm_id)
        break

    test_X = test[feature_cols]
    train_X = train[feature_cols]

    test_y = test[target_col]
    train_y = train[target_col]

    ridge = Ridge(alpha=5)
    ridge.fit(train_X, train_y)
    pred = ridge.predict(test_X)
    mse = mean_squared_error(pred, test_y)
    rmse = np.sqrt(mse)

    models[atm_id] = *ridge.coef_, ridge.intercept_
    score_by_atm[atm_id] = rmse
    predict_by_atm[atm_id] = pred
    target_by_atm[atm_id] = test_y
    date_by_atm[atm_id] = test.datetime

In [None]:
#　特徴量ごとの回帰係数＋切片
columns = *feature_cols, 'intercept'
coef_matrix = pd.DataFrame(models, index=columns).T
coef_matrix

In [None]:
#　リッジ回帰

plt.bar(score_by_atm.keys(), list(score_by_atm.values()));
np.mean(list(score_by_atm.values()))

In [None]:
score_by_atm.values()

In [None]:
score_by_atm.keys()

In [None]:
#　リッジ回帰
atm_id = 74
print(score_by_atm[atm_id])
plt.figure(figsize=(20, 10))
plt.plot(date_by_atm[atm_id], target_by_atm[atm_id], label='test')
plt.plot(date_by_atm[atm_id], predict_by_atm[atm_id], label='pred')
plt.legend();

In [None]:
atm_id = 87
print(score_by_atm[atm_id])
plt.figure(figsize=(20, 10))
plt.plot(date_by_atm[atm_id], target_by_atm[atm_id], label='test')
plt.plot(date_by_atm[atm_id], predict_by_atm[atm_id], label='pred')
plt.legend()

In [None]:
target_atm = 87

sorted_data = data_preprocessed.sort_values(by='datetime')
is_target = sorted_data.atm_id == target_atm 
date = sorted_data[is_target].datetime
target = sorted_data[is_target].target
plt.figure(figsize=(20, 10))
plt.plot(date, target);
plt.plot(date_by_atm[atm_id], predict_by_atm[atm_id]);

### ランダムフォレスト

In [None]:
# ランダムフォレスト

from sklearn.ensemble import RandomForestRegressor

score_by_atm = {}
predict_by_atm = {}
target_by_atm = {}
date_by_atm = {}
models = {}


for atm_id in dataset_by_atm.keys():
    
    target_data = dataset_by_atm[atm_id]
    is_test = target_data.datetime >= ratio_date
    test = target_data[is_test].sort_values(by='datetime')
    is_train_start = target_data.datetime >= train_start_date
    train = target_data[(~is_test) & (is_train_start)]
    
    if len(test) == 0:
        print(atm_id)
        break

    test_X = test[feature_cols]
    train_X = train[feature_cols]

    test_y = test[target_col]
    train_y = train[target_col]

    rf = RandomForestRegressor(random_state=1234)
    rf.fit(train_X, train_y)
    pred = rf.predict(test_X)
    mse = mean_squared_error(test_y, pred)
    rmse = np.sqrt(mse)

    models[atm_id] = *ridge.coef_, ridge.intercept_
    score_by_atm[atm_id] = rmse
    predict_by_atm[atm_id] = pred
    target_by_atm[atm_id] = test_y
    date_by_atm[atm_id] = test.datetime

In [None]:
#　ランダムフォレスト

plt.bar(score_by_atm.keys(), list(score_by_atm.values()));
np.mean(list(score_by_atm.values()))

In [None]:
score_by_atm.values()

In [None]:
score_by_atm.keys()

In [None]:
#　ランダムフォレスト
atm_id = 74
print(score_by_atm[atm_id])
plt.figure(figsize=(20, 10))
plt.plot(date_by_atm[atm_id], target_by_atm[atm_id], label='test')
plt.plot(date_by_atm[atm_id], predict_by_atm[atm_id], label='pred')
plt.legend();

In [None]:
#　ランダムフォレスト
atm_id = 87
print(score_by_atm[atm_id])
plt.figure(figsize=(20, 10))
plt.plot(date_by_atm[atm_id], target_by_atm[atm_id], label='test')
plt.plot(date_by_atm[atm_id], predict_by_atm[atm_id], label='pred')
plt.legend();

### prophet

In [None]:
pip install fbprophet

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
from fbprophet import Prophet

In [None]:
data_preprocessed2 = data_preprocessed
data_preprocessed2 = data_preprocessed2[['datetime' ,'target','atm_id']]
data_preprocessed2 = data_preprocessed2.rename(columns={'datetime':'ds', 'target':'y'})
data_preprocessed2

In [None]:
target_col_p = ['y']
feature_cols_p = ['ds','y']
feature_cols_p

In [None]:
dataset_by_atm = {}
for atm_id in data_preprocessed2.atm_id.unique():
    is_target = atm_id == data_preprocessed2.atm_id 
    data_by_atm = data_preprocessed2[is_target].reset_index(drop=True)
    dataset_by_atm[atm_id] = data_by_atm

In [None]:
score_by_atm = {}
predict_by_atm = {}
target_by_atm = {}
date_by_atm = {}
models = {}


for atm_id in dataset_by_atm.keys():
    
    target_data = dataset_by_atm[atm_id]
    is_test = target_data.ds >= ratio_date
    test = target_data[is_test].sort_values(by='ds')
    is_train_start = target_data.ds >= train_start_date
    train = target_data[(~is_test) & (is_train_start)]
    
    if len(test) == 0:
        print(atm_id)
        break

    test_X = test[feature_cols_p]
    train_X = train[feature_cols_p]

    test_y = test[target_col_p]
    train_y = train[target_col_p]


    m = Prophet()
    m.fit(train_X)
    
    pred = m.predict(test_X)
    pred = pred['yhat']
    
    mse = mean_squared_error(pred, test_y)
    rmse = np.sqrt(mse)

    score_by_atm[atm_id] = rmse
    predict_by_atm[atm_id] = pred
    target_by_atm[atm_id] = test_y
    date_by_atm[atm_id] = test.ds

In [None]:
plt.bar(score_by_atm.keys(), list(score_by_atm.values()));
np.mean(list(score_by_atm.values()))

In [None]:
score_by_atm.values()

In [None]:
score_by_atm.keys()

In [None]:
atm_id = 74
print(score_by_atm[atm_id])
plt.figure(figsize=(10, 5))
plt.plot(date_by_atm[atm_id], target_by_atm[atm_id], label='test')
plt.plot(date_by_atm[atm_id], predict_by_atm[atm_id], label='pred')
plt.legend();

In [None]:
atm_id = 87
print(score_by_atm[atm_id])
plt.figure(figsize=(10, 5))
plt.plot(date_by_atm[atm_id], target_by_atm[atm_id], label='test')
plt.plot(date_by_atm[atm_id], predict_by_atm[atm_id], label='pred')
plt.legend();

### ATMグルーピング

In [None]:
columns = *feature_cols, 'intercept'
coef_matrix = pd.DataFrame(models, index=columns).T
coef_matrix

In [None]:
from sklearn.preprocessing import StandardScaler

# 標準化を行う
scaler = StandardScaler()
scaler.fit(coef_matrix)
coef_matrix2 = scaler.transform(coef_matrix)
coef_matrix2

In [None]:
coef_matrix2 = (coef_matrix - coef_matrix.mean()) / coef_matrix.std()
coef_matrix2

In [None]:
# データフレームの各列を正規化
coef_matrix3 = coef_matrix.apply(lambda x: (x - np.mean(x)) / (np.max(x) - np.min(x)))
coef_matrix3

In [None]:
# KMeansで似た傾向のatm_idを可視化
%matplotlib notebook
import matplotlib as mpl
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
from matplotlib import cm
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples

In [None]:
data = coef_matrix2

In [None]:
# モデルを初期化
km =KMeans(n_clusters=3, random_state=1234)

km.fit(data)

cluster_label = km.predict(data)

In [None]:
import copy

data_with_cluster_label = copy.copy(data)
data_with_cluster_label['cluster_label'] = cluster_label

In [None]:
distortions = []

for i  in range(1,15):                # 1~14クラスタまで計算 
    km = KMeans(n_clusters=i, random_state=1234)
    km.fit(data)                       # クラスタリングの計算を実行
    distortions.append(km.inertia_)   

plt.plot(range(1,15),distortions,marker='o')
plt.xlabel('Number of clusters')
plt.ylabel('Distortion')
plt.show()

In [None]:
#1回目グルーピング
coef_matrix_cluster1 = coef_matrix2[['week' ,'day']]
coef_matrix_cluster1

In [None]:
c_array= np.array([
    coef_matrix_cluster1['week'].tolist(),
    coef_matrix_cluster1['day'].tolist()
    ], np.float)
c_array

In [None]:
c_array = c_array.T
print(c_array)

In [None]:
num_clusters = 3
clf = KMeans(n_clusters = num_clusters ) 
clf.fit(c_array) 
pred = clf.predict(c_array) 
pred

In [None]:
coef_matrix_cluster1['atm_cluster_id'] = pred
coef_matrix_cluster1

In [None]:
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111)
colors = ['Red', 'Blue', 'Green']
 
for t in range(num_clusters):
    x = coef_matrix_cluster1.loc[coef_matrix_cluster1['atm_cluster_id'] == t, 'week']
    y = coef_matrix_cluster1.loc[coef_matrix_cluster1['atm_cluster_id'] == t, 'day']
 
    ax.scatter(x, y, alpha=0.5, label='cluster ' + str(t), color=colors[t])
 
ax.set_title('Scatter Plot')
ax.set_xlabel('week')
ax.set_ylabel('day')
 
ax.legend(loc='upper left', fontsize=12)
 
plt.show()

In [None]:
#2回目グルーピング
coef_matrix_cluster2 = coef_matrix2[['intercept' ,'week']]
coef_matrix_cluster2

c_array= np.array([
    coef_matrix_cluster2['week'].tolist(),
    coef_matrix_cluster2['intercept'].tolist()
    ], np.float)

c_array = c_array.T

num_clusters = 3
clf = KMeans(n_clusters = num_clusters ) 
clf.fit(c_array) 
pred = clf.predict(c_array) 

coef_matrix_cluster2['atm_cluster_id'] = pred
coef_matrix_cluster2


In [None]:
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111, )
colors = ['Red', 'Blue', 'Green']
 
for t in range(num_clusters):
    x = coef_matrix_cluster2.loc[coef_matrix_cluster2['atm_cluster_id'] == t, 'week']
    y = coef_matrix_cluster2.loc[coef_matrix_cluster2['atm_cluster_id'] == t, 'intercept']
 
    ax.scatter(x, y, alpha=0.5, label='cluster ' + str(t), color=colors[t])
 
ax.set_title('Scatter Plot')
ax.set_xlabel('week')
ax.set_ylabel('intercept')
 
ax.legend(loc='upper left', fontsize=12)
 
plt.show()

In [None]:
#3回目グルーピング
coef_matrix_cluster3 = coef_matrix2[['intercept' ,'day']]
coef_matrix_cluster3

c_array= np.array([
    coef_matrix_cluster3['day'].tolist(),
    coef_matrix_cluster3['intercept'].tolist()
    ], np.float)

c_array = c_array.T

num_clusters = 3
clf = KMeans(n_clusters = num_clusters ) 
clf.fit(c_array) 
pred = clf.predict(c_array) 

coef_matrix_cluster3['atm_cluster_id'] = pred
coef_matrix_cluster3



In [None]:
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111, )
colors = ['Red', 'Blue', 'Green']
 
for t in range(num_clusters):
    x = coef_matrix_cluster3.loc[coef_matrix_cluster3['atm_cluster_id'] == t, 'day']
    y = coef_matrix_cluster3.loc[coef_matrix_cluster3['atm_cluster_id'] == t, 'intercept']
 
    ax.scatter(x, y, alpha=0.5, label='cluster ' + str(t), color=colors[t])
 
ax.set_title('Scatter Plot')
ax.set_xlabel('day')
ax.set_ylabel('intercept')
 
ax.legend(loc='upper left', fontsize=12)
 
plt.show()

In [None]:
#4回目グルーピング
coef_matrix_cluster4 = coef_matrix2[['intercept' ,'salary25th']]
coef_matrix_cluster4

c_array= np.array([
    coef_matrix_cluster4['intercept'].tolist(),
    coef_matrix_cluster4['salary25th'].tolist()
    ], np.float)

c_array = c_array.T

num_clusters = 3
clf = KMeans(n_clusters = num_clusters ) 
clf.fit(c_array) 
pred = clf.predict(c_array) 

coef_matrix_cluster4['atm_cluster_id'] = pred
coef_matrix_cluster4




In [None]:
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111, )
colors = ['Red', 'Blue', 'Green']
 
for t in range(num_clusters):
    x = coef_matrix_cluster4.loc[coef_matrix_cluster4['atm_cluster_id'] == t, 'salary25th']
    y = coef_matrix_cluster4.loc[coef_matrix_cluster4['atm_cluster_id'] == t, 'intercept']
 
    ax.scatter(x, y, alpha=0.5, label='cluster ' + str(t), color=colors[t])
 
ax.set_title('Scatter Plot')
ax.set_xlabel('salary25th')
ax.set_ylabel('intercept')
 
ax.legend(loc='upper left', fontsize=12)
 
plt.show()

In [None]:
#5回目グルーピング
coef_matrix_cluster5 = coef_matrix2[['intercept' ,'日降水量(mm)']]
coef_matrix_cluster5

c_array= np.array([
    coef_matrix_cluster5['intercept'].tolist(),
    coef_matrix_cluster5['日降水量(mm)'].tolist()
    ], np.float)

c_array = c_array.T

num_clusters = 3
clf = KMeans(n_clusters = num_clusters ) 
clf.fit(c_array) 
pred = clf.predict(c_array) 

coef_matrix_cluster5['atm_cluster_id'] = pred
coef_matrix_cluster5



In [None]:
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111)
colors = ['Red', 'Blue', 'Green']
 
for t in range(num_clusters):
    x = coef_matrix_cluster5.loc[coef_matrix_cluster5['atm_cluster_id'] == t, '日降水量(mm)']
    y = coef_matrix_cluster5.loc[coef_matrix_cluster5['atm_cluster_id'] == t, 'intercept']
 
    ax.scatter(x, y, alpha=0.5, label='cluster ' + str(t), color=colors[t])
 
ax.set_title('Scatter Plot')
ax.set_xlabel('日降水量(mm)')
ax.set_ylabel('intercept')
 
ax.legend(loc='upper left', fontsize=12)
 
plt.show()

In [None]:
#6回目グルーピング
coef_matrix_cluster6 = coef_matrix2[['intercept' ,'day' ,'week']]
coef_matrix_cluster6

c_array= np.array([
    coef_matrix_cluster6['intercept'].tolist(),
    coef_matrix_cluster6['day'].tolist(),
    coef_matrix_cluster6['week'].tolist()
    ], np.float)

c_array = c_array.T

num_clusters = 3
clf = KMeans(n_clusters = num_clusters ) 
clf.fit(c_array) 
pred = clf.predict(c_array) 

coef_matrix_cluster6['atm_cluster_id'] = pred
coef_matrix_cluster6


In [None]:
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(111, projection='3d')
colors = ['Red', 'Blue', 'Green']
 
for t in range(num_clusters):
    x = coef_matrix_cluster6.loc[coef_matrix_cluster6['atm_cluster_id'] == t, 'week']
    y = coef_matrix_cluster6.loc[coef_matrix_cluster6['atm_cluster_id'] == t, 'intercept']
    z = coef_matrix_cluster6.loc[coef_matrix_cluster6['atm_cluster_id'] == t, 'day'] 

    ax.scatter(x, y, alpha=0.5, label='cluster ' + str(t), color=colors[t])
 
ax.set_title('Scatter Plot')
ax.set_xlabel('week')
ax.set_ylabel('intercept')
ax.set_zlabel('day')
 
ax.legend(loc='upper left', fontsize=12)
 
plt.show()