In [None]:
import numpy as np 
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
#plt.style.use('fivethirtyeight');
#plt.rcParams['font.size'] = 14;
#plt.figure(figsize=(12,5));
palette = sns.color_palette('Paired', 10);
import folium
from folium.plugins import HeatMap
from folium.plugins import HeatMapWithTime

In [None]:
train_1= pd.read_csv('../input/taxi-data/taxi_data/train_gps_points.csv')
train_2= pd.read_csv('../input/taxi-data/taxi_data/train_hire_stats.csv')
test_df= pd.read_csv('../input/taxi-data/taxi_data/test_hire_stats.csv')
zone_df= pd.read_csv('../input/taxi-data/taxi_data/zones.csv')
train_2.head()

In [None]:
sns.displot(train_2['Hire_count'])

In [None]:
data=train_2.copy()
data['Hire_count-bin'] = pd.cut(data['Hire_count'], bins = list(range(0, 150, 15)), include_lowest=True).astype('str')

# Uppermost bin
data['Hire_count-bin'] = data['Hire_count-bin'].replace(np.nan, '[150+]')

# apply this to clean up the first bin's label
data['Hire_count-bin'] = data['Hire_count-bin'].apply(lambda x: x.replace('-0.001', '0'))

# sort by fare the correct look in the chart
data = data.sort_values(by='Hire_count')
sns.catplot(x="Hire_count-bin", kind="count", palette=palette, data=data, height=5, aspect=3);
sns.despine()
plt.show()



In [None]:
## Look Data Distribution
sns.jointplot(train_2['Hour_slot'], train_2['Hire_count'], kind='resid',)

In [None]:
plt.hist2d(train_2['Hour_slot'], train_2['Hire_count'], bins=(50, 50), vmax=1200)
plt.colorbar()
plt.xlabel('Hour slot')
plt.ylabel('Hire Count')

In [None]:
time_df= pd.to_datetime(train_2['Date'], format='%Y-%m-%d')
# Time data preparation
train=pd.DataFrame()
train["year"] = time_df.dt.year
train["month"] = time_df.dt.month
train["day"] = time_df.dt.day
train["weekday"] = time_df.dt.weekday
train.head(200)

In [None]:
sns.jointplot(train['weekday'], train_2['Hire_count'], kind='resid',)

In [None]:
plt.hist2d(train['weekday'], train_2['Hire_count'], bins=(50, 50), vmax=1000)
plt.colorbar()
plt.xlabel('WeekDay')
plt.ylabel('Hire Count')

In [None]:
sns.jointplot(train['month'], train_2['Hire_count'], kind='resid',)

In [None]:
plt.hist2d(train['month'], train_2['Hire_count'], bins=(50, 50), vmax=1000)
plt.colorbar()
plt.xlabel('Month')
plt.ylabel('Hire Count')

In [None]:
train_2['weekday']=train['weekday']
def apply_month(x):
    return int(x[5]+x[6])
def apply_day(x):
    return int(x[8]+x[9])
train_2['Month']=train_2['Date'].apply(apply_month)
train_2['Day']=train_2['Date'].apply(apply_day)
train_2=train_2.drop(["Date"], axis=1)
train_df=train_2[['Zone_ID', 'Month','Day', 'weekday', 'Hour_slot', 'Hire_count']]
train_df.head()

In [None]:
# Data preprocessing
import datetime
# Time data preprocessing
time_df_gps=pd.to_datetime(train_1['Datetime'],utc=True ,infer_datetime_format=True, format='%Y-%m-%d %H-%M-%S +UTC')
# Time data preparation
train=pd.DataFrame()
train["year"] = time_df_gps.dt.year
train["month"] = time_df_gps.dt.month
train["day"] = time_df_gps.dt.day
train["weekday"] = time_df_gps.dt.weekday
train["hour"] = time_df_gps.dt.hour

In [None]:
plt.hist2d(train['hour'], train_1['Zone_ID'], bins=(50, 50), vmax=2000)
plt.colorbar()
plt.xlabel('Zone_ID')
plt.ylabel('Hour')

In [None]:
## Look Data Distribution
sns.jointplot(train['hour'], train_1['Zone_ID'], kind='hex')

In [None]:
gps_extract_df=pd.DataFrame()
gps_extract_df['hour']= train['hour']
gps_extract_df['Zone_ID']=train_1['Zone_ID']
gps_extract_df['day']= train['day']
gps_extract_df['month']= train['month']
gps_extract_df['year']=train['year']
gps_extract_df

In [None]:
# generate a table of those culprit rows which are duplicated:
df=gps_extract_df
dups = df.groupby(df.columns.tolist()).size().reset_index().rename(columns={0:'count'})
dups

In [None]:
## Look Data Distribution
sns.jointplot(dups['count'], dups['Zone_ID'], kind='hex',xlim=(0,250), ylim=(0,25))

In [None]:
## Look Data Distribution
sns.jointplot(dups['hour'], dups['count'], kind='hex', ylim=(0,200))

In [None]:
df=train_1.copy()
df=df.sample(frac=1).reset_index(drop=True)
df=df[:100000]
for col in ["Longitude_X", "Latitude_Y"]:
    MIN = df[col].min()
    MAX = df[col].max()
    print(col, MIN, MAX)
df = df[df["Longitude_X"].between(left = 121.5, right = 121.7 )]
df = df[df["Latitude_Y"].between(left = 24.0, right = 25.2 )] 
center_location = [25.0838005, 121.590033]
m = folium.Map(location=center_location, control_scale=True,zoom_start=13)

In [None]:
Heat_data = [[row['Latitude_Y'], row['Longitude_X']] for index, row in df.iterrows()]
HeatMap(Heat_data,radius=18).add_to(m)
from folium.plugins import MarkerCluster
marker_cluster = MarkerCluster().add_to(m)
for index, row in zone_df.iterrows():
    information = 'Zone_ID: '+ str(row['Zone_ID'])
    folium.Marker(location=[(row['top']+row['bottom'])/2, (row['left']+row['right'])/2], 
                  popup=folium.Popup(information, max_width=500), max_width='100').add_to(marker_cluster)
m

In [None]:
df= pd.read_csv('../input/taxi-data/taxi_data/train_hire_stats.csv')
df['Zone_ID']=df['Zone_ID']-1
df=df.groupby(by=['Zone_ID']).sum()
df['Latitude_Y']=(zone_df['top']+zone_df['bottom'])/2
df['Longitude_X']=(zone_df['left']+zone_df['right'])/2
#df=df[:24]
df
m2 = folium.Map(location=center_location, control_scale=True,zoom_start=13)
#for i in range(0,len(df)):
for i in range(len(df)):    
    folium.Circle(
        location=[df.iloc[i]['Latitude_Y'], df.iloc[i]['Longitude_X']],
        tooltip = "<h5 style='text-align:center;font-weight: bold'>"+str(i+1)+"</h5>",
        radius=(int((np.log(df.iloc[i,-3]+1.00001)))+0.2)*100,
        color='#ff6600',
        fill_color='#ff8533',
        fill=True).add_to(m2)

m2

In [None]:
df= pd.read_csv('../input/taxi-data/taxi_data/train_hire_stats.csv')
df=df.groupby(by=['Date']).sum()
df=df.drop(["Zone_ID"], axis=1)
df=df.drop(["Hour_slot"], axis=1)

# Plot
f = plt.figure(figsize=(15,10))
ax = f.add_subplot(111)

date = np.arange(0,len(df))
marker_style = dict(linewidth=2, linestyle='-', marker='o',markersize=5)
plt.plot(date,df,"-.",color="red",**marker_style)
ax.tick_params(which='both', width=1,labelsize=12)
ax.tick_params(which='major', length=6)
ax.tick_params(which='minor', length=3, color='0.8')
#plt.xticks(list(np.arange(0,len(total),int(len(total)/5))),total[:-1:int(len(total)/5)]+[total[-1]])
# Grid
plt.grid(lw = 1, ls = '-', c = "0.85", which = 'major')
plt.grid(lw = 1, ls = '-', c = "0.95", which = 'minor')

# Axis Lable
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m/%d'))
plt.xlabel("Day",fontsize =18)
plt.ylabel("hire_count",fontsize =18)

# plt.yscale("log")
plt.tick_params(labelsize = 13) 
#plt.savefig(out+"daily confirmed cases global.png")
plt.show()

In [None]:
df=train_df.copy()
df=df.groupby(by=['Month']).sum()
df=df.drop(["weekday"], axis=1)
df=df.drop(["Day"], axis=1)
df=df.drop(["Zone_ID"], axis=1)
df=df.drop(["Hour_slot"], axis=1)

# Plot
f = plt.figure(figsize=(15,10))
ax = f.add_subplot(111)

#date = np.arange(0,len(df))
marker_style = dict(linewidth=2, linestyle='-', marker='o',markersize=5)
plt.plot(df.index,df,"-.",color="red",**marker_style)
ax.tick_params(which='both', width=1,labelsize=12)
ax.tick_params(which='major', length=6)
ax.tick_params(which='minor', length=3, color='0.8')

# Grid
plt.grid(lw = 1, ls = '-', c = "0.85", which = 'major')
plt.grid(lw = 1, ls = '-', c = "0.95", which = 'minor')

# Axis Lable
#plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%m'))
plt.xlabel("Month",fontsize =18)
plt.ylabel("hire_count",fontsize =18)
#plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=1))
# plt.yscale("log")
plt.tick_params(labelsize = 13) 
#plt.savefig(out+"daily confirmed cases global.png")
plt.show()

In [None]:
train_data= pd.read_csv('../input/taxi-data/taxi_data/train_hire_stats.csv')
train_data

In [None]:
df=[int(i) for i in np.linspace(1,8760 , 8760)]
df=pd.DataFrame(df)
day =365*24
df_day=pd.DataFrame()
df_day['day_sin'] = np.sin(2*np.pi*df[0]/day)
df_day['day_cos'] = np.cos(2*np.pi*df[0]/day)
print(f'Hour sequence data shape{df_day.shape}')

plt.plot(np.array(df_day['day_sin']), scaley=True, scalex=True)
plt.plot(np.array(df_day['day_cos']))
plt.xlabel('Time [h]')
plt.title('Time of day signal')

day_df=pd.DataFrame()
day_df['day_sin'] = pd.concat([df_day['day_sin']]*25, ignore_index=True)
day_df['day_cos'] = pd.concat([df_day['day_cos']]*25, ignore_index=True)
print(day_df.head(10))
print(day_df.shape)

In [None]:
df=[int(i) for i in np.linspace(1,24 , 24)]
df=pd.DataFrame(df)
hours_day =24
df_hour=pd.DataFrame()
df_hour['sin_time'] = np.sin(2*np.pi*df[0]/hours_day)
df_hour['cos_time'] = np.cos(2*np.pi*df[0]/hours_day)
print(f'Hour sequence data shape{df_hour.shape}')

plt.plot(np.array(df_hour['sin_time']), scaley=True, scalex=True)
plt.plot(np.array(df_hour['cos_time']))
plt.xlabel('Time [h]')
plt.title('Time of day signal')

hour_df=pd.DataFrame()
hour_df['hour_sine'] = pd.concat([df_hour['sin_time']]*9150, ignore_index=True)
hour_df['hour_cosine'] = pd.concat([df_hour['cos_time']]*9150, ignore_index=True)
hour_df.head(10)

In [None]:
df=[int(i) for i in np.linspace(1, 8760, 8760)]
df=pd.DataFrame(df)
print(df.shape)
month =365*24
df['month_sine'] = np.sin(2*np.pi*df[0]/month)
df['month_cos'] = np.cos(2*np.pi*df[0]/month)

month_fre_df=pd.DataFrame()
month_fre_df['month_sine'] = pd.concat([df['month_sine']]*25, ignore_index=True)
month_fre_df['day_cosine'] = pd.concat([df['month_cos']]*25, ignore_index=True)
print(f'Convert Month_data into cycle {month_fre_df.shape}')

plt.plot(np.array(df['month_sine'][:8760]), scaley=True, scalex=True)
plt.plot(np.array(df['month_cos'][:8760]))
plt.xlabel('Time [h]')
plt.title('Time of day signal')
print(df.head(10))

In [None]:
train_data

In [None]:

''' Get Training Data time '''
holiday=['2016-02-06','2016-02-07','2016-02-08','2016-02-09','2016-02-10','2016-02-11','2016-02-12','2016-02-13','2016-02-14','2016-02-27','2016-02-28','2016-02-29',
        '2016-04-02','2016-04-03','2016-04-04','2016-04-05','2016-06-09','2016-06-10','2016-06-11','2016-06-12',
        '2016-09-15','2016-09-16','2016-09-17','2016-09-18',
         '2016-10-09', '2016-10-10', '2016-10-11','2016-12-31','2017-01-01','2017-01-02',
         '2017-01-27','2017-01-28','2017-01-29','2017-01-30','2017-01-31',
         '2017-02-01','2017-02-25','2017-02-26','2017-02-27',2017-02-28'
        ]
train_time= pd.to_datetime(train_data['Date'], format='%Y-%m-%d')
train_time_df=pd.DataFrame()
#train_time_df["year"] = time_df_for.dt.year
train_time_df["month"] = train_time.dt.month
train_time_df["day"] = train_time.dt.day
train_time_df["weekday"] = train_time.dt.weekday
train_time_df["workday"] = train_time.dt.weekday
for i in train_time_df.index: 
     if train_time_df["weekday"][i]==5 or  train_time_df["weekday"][i]==6 or str(train_time.iloc[i]).split()[0] in holiday :
        train_time_df["workday"][i]=0
     else:
        train_time_df["workday"][i]=1
train_time_df
'''Train_df Data time'''

train_df= pd.DataFrame()
train_df['weekday']=train_time_df['weekday']
train_df['workday']=train_time_df['workday']
train_df['month_cos']=np.sin(2*np.pi*train_time_df["month"]/12)
train_df['month_sin']=np.cos(2*np.pi*train_time_df["month"]/12)
train_df['day_cos']=np.cos(2*np.pi*train_time_df["day"]/31)
train_df['day_sin']=np.sin(2*np.pi*train_time_df["day"]/31)
train_df['hour_cos']=np.cos(2*np.pi*train_data["Hour_slot"]/24)
train_df['hour_sine']=np.sin(2*np.pi*train_data["Hour_slot"]/24)
train_df['zone_id']=train_data['Zone_ID']
train_df['Hire_count']=train_data['Hire_count']

# train_df['month_cos'][:]
#fig.subplots_adjust(hspace=0.5, wspace=0.001)
fig=plt.figure( figsize=(15, 12), facecolor='w', edgecolor='k')
fig.subplots_adjust(hspace = .5, wspace=.005)
# ''' Attention remmber   RUNING THE Tensorflow AFFINE TRANSFORM FIRST OTHERWISE WILL POPOUT ERROR'''
id=1
row=3
col=1
data_all=[train_df['month_cos'][:8786], 
          train_df['month_sin'][:8786], 
          
            train_df['day_cos'][:750],
            train_df['day_sin'][:750],

            train_df['hour_cos'][:24],
            train_df['hour_sine'][:24]]
title=['Month_sine_cos', 'Day_sine_cos', 'Hour_sine_cos']
i_=0
for i in range(3):
    if i==1:
        i=2
    elif i==2: 
        i=4
        
    fig.add_subplot(row, col, id)
    
    plt.plot(np.array(data_all[i]), scaley=True, scalex=True)
    plt.plot(np.array(data_all[i+1]),scaley=True, scalex=True)
    plt.title(title[i_])
    i_+=1
    id+=1
    plt.show()   
    #plt.show()

In [None]:
from sklearn.preprocessing import minmax_scale
import pandas as pd
##Normalize Data 
train_df[['Hire_count','zone_id','weekday']]= minmax_scale(train_df[['Hire_count','zone_id', 'weekday']],feature_range=(0, 1))

In [None]:
train_df

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
encoder= LabelEncoder()
#train_df= train_df.drop(['Hire_count'], axis=1)
train_y=pd.DataFrame()
train_y['Hire_count']=encoder.fit_transform(train_df['Hire_count'].values)
train_y=train_y.astype('float32')
train_y
# take the Encode label scale into Certain Range values
scaler=MinMaxScaler(feature_range=(-1,1))
train_y_scale=scaler.fit_transform(train_y)## data type pass inn should in the form of (Dataframe)
print(np.max(train_y))
print(np.max(train_y_scale))
## Invert Transform back to the orginal value
true_value= scaler.inverse_transform(train_y_scale)
print(np.max(true_value))

In [None]:
import sklearn
## Here shuffle IS good IDEA for Regression Model
#train_df=sklearn.utils.shuffle(train_df)
n=len(train_df)
training_df= train_df[0:int(n*0.8)]
val_df= train_df[int(n*0.8):]
print(len(training_df))
print(len(val_df))

In [None]:
training_df

In [None]:
train=np.unique(training_df["zone_id"])
train

In [None]:
test_data= pd.read_csv('../input/taxi-data/taxi_data/test_hire_stats.csv')

'''Removing the Repeat in Time '''
time_df=pd.DataFrame()

# test_time_df['date']= test_df['Date']
# test_time_df['zone_id']=test_df['Zone_ID']
# # print('Unique values of ZoneID:', time_df.zone_id.unique())
# test_time_df_rep = test_time_df.groupby(test_time_df.columns.tolist()).size().reset_index().rename(columns={0:'count'})
# print(test_time_df_rep)
holiday=[
         '2017-02-01','2017-02-25','2017-02-26','2017-02-27','2017-02-28'
        ]
''' Get Training Data time '''
test_time= pd.to_datetime(test_data['Date'], format='%Y-%m-%d')
test_time_df=pd.DataFrame()
#train_time_df["year"] = time_df_for.dt.year
test_time_df["month"] = test_time.dt.month
test_time_df["day"] = test_time.dt.day
test_time_df["weekday"] = test_time.dt.weekday
test_time_df["workday"] = test_time.dt.weekday
for i in test_time_df.index: 
     if test_time_df["weekday"][i]==5 or  test_time_df["weekday"][i]==6 or str(test_time.iloc[0]).split()[0] in holiday :
        test_time_df["workday"][i]=0
     else:
        test_time_df["workday"][i]=1
test_time_df
'''Train_df Data time'''

test_df= pd.DataFrame()
test_df['weekday']=test_time_df['weekday']
test_df['workday']=test_time_df['workday']
test_df['month_cos']=np.sin(2*np.pi*test_time_df["month"]/12)
test_df['month_sin']=np.cos(2*np.pi*test_time_df["month"]/12)
test_df['day_cos']=np.cos(2*np.pi*test_time_df["day"]/31)
test_df['day_sin']=np.sin(2*np.pi*test_time_df["day"]/31)
test_df['hour_cos']=np.cos(2*np.pi*test_data["Hour_slot"]/24)
test_df['hour_sine']=np.sin(2*np.pi*test_data["Hour_slot"]/24)
test_df['zone_id']=test_data['Zone_ID']
test_df['Hire_count']=test_data['Hire_count']
test_df

In [None]:
zone_id_int=[int(i) for i in (np.linspace(1, 25, 25))]
print(zone_id_int)
# ##Normalize Data 
zone_id= minmax_scale(zone_id_int,feature_range=(0, 1))
zone_id
dict_val= zip(zone_id_int, zone_id)
## GET SCALE FOR zone ID
list_val=[]
for i in dict_val: 
    list_val.append(i)

In [None]:
## Convert test ZONE_ID the same SCALE
test_zone=test_df['zone_id']
test_zone_id=[]
for i in test_zone: 
    for l in list_val:
        if i==l[0]: 
            #print(l[1])
            test_zone_id.append(l[1])
            
test_zone_id=np.array(test_zone_id)
test_df['zone_id']=test_zone_id

In [None]:
from sklearn.preprocessing import minmax_scale
import pandas as pd
##Normalize Data 
test_df['weekday']= minmax_scale(test_df['weekday'],feature_range=(0, 1))

In [None]:
test_df

In [None]:
class WindowGenerator():
    def __init__(self, input_width, label_width, shift,
               train_df=train_df, val_df=val_df, 
               label_columns=None):
        # Store the raw data.
        self.train_df = train_df
        self.val_df = val_df


        # Work out the label column indices.
        self.label_columns = label_columns
        if label_columns is not None:
            self.label_columns_indices = {name: i for i, name in
                                        enumerate(label_columns)}
        self.column_indices = {name: i for i, name in
                               enumerate(train_df.columns)}

        # Work out the window parameters.
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift

        self.total_window_size = input_width + shift

        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]

        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

    def __repr__(self):
        
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Label indices: {self.label_indices}',
            f'Label column name(s): {self.label_columns}'])

In [None]:
w1 = WindowGenerator(input_width=7, label_width=1, shift=1,train_df=training_df, val_df=val_df, 
                     label_columns='Hire_count')
w1

In [None]:
## Spliting the Window as we feed the input 
def split_window(self, features): 
    inputs = features[:, self.input_slice, :]
    labels = features[:, self.labels_slice, :]
    if self.label_columns is not None:
        labels = tf.stack(
            [labels[:, :, self.column_indices[self.label_columns]]],axis=-1)
    # labels = tf.stack(
    #     [labels[:, :, self.column_indices[name]] for name in self.label_columns],
    #     axis=-1)
    # Slicing doesn't preserve static shape information, so set the shapes
    # manually. This way the `tf.data.Datasets` are easier to inspect
    inputs.set_shape([None, self.input_width, None])
    labels.set_shape([None, self.label_width, None])

    return inputs, labels

import tensorflow as tf
'''IMPORTANT OF IMPROVING YOUR CODE '''
## Learning cool way to Extend your Objects 
WindowGenerator.split_window= split_window

# Stack three slices, the length of the total window:
example_window = tf.stack([np.array(train_df[:w1.total_window_size]),
                           np.array(train_df[100:100+w1.total_window_size]),
                           np.array(train_df[200:200+w1.total_window_size])])

## Example input and Output. 
example_inputs, example_labels= w1.split_window(example_window)
print('All shapes are: (batch, time, features)')
print(f'Window shape: {example_window.shape}')
print(f'Inputs shape: {example_inputs.shape}')
print(f'labels shape: {example_labels.shape}')

In [None]:
def plot(self, model=None, plot_col='Hire_count', max_subplots=3):
    inputs, labels = self.example
    print(inputs.shape)
    plt.figure(figsize=(12, 8))
    plot_col_index = self.column_indices[plot_col]
    max_n = min(max_subplots, len(inputs))
    
    for n in range(max_n):
        plt.subplot(max_n, 1, n+1)
        plt.ylabel(f'{plot_col} [normed]')
        plt.plot(self.input_indices, inputs[n, :, plot_col_index],
                 label='Inputs', marker='.', zorder=-10)

        if self.label_columns:
            label_col_index = self.label_columns_indices.get(plot_col, None)
        else:
            label_col_index = plot_col_index

        if label_col_index is None:
            continue

        plt.scatter(self.label_indices, labels[n, :, label_col_index],
                    edgecolors='k', label='Labels', c='#2ca02c', s=64)

        if model is not None:
            predictions = model.predict(inputs)
            #print(Predictions.shape)
            plt.scatter(self.label_indices, predictions[n, :, label_col_index],
                      marker='X', edgecolors='k', label='Predictions',
                      c='#ff7f0e', s=64)

        if n == 0:
            plt.legend()

    plt.xlabel('Time [h]')

WindowGenerator.plot = plot

In [None]:
## this line code set attribute example for Windowgenerator 
w1.example = example_inputs, example_labels

In [None]:
def plot(self, model=None, plot_col='Hire_count', max_suplots=2):
    inputs, labels= self.example
    #print(inputs)

    plt.figure(figsize=(12, 8))
    plot_col_index= self.column_indices[plot_col]
    print(plot_col_index)
    max_n= min(max_suplots, len(inputs))
  
    for n in range(max_n):
        plt.subplot(max_n, 1, n+1)
        plt.ylabel(f'{plot_col}: [Norm]')

        plt.plot(self.input_indices, inputs[n, :, plot_col_index], 
                 label='Inputs', marker=".", zorder=-10)

        if self.label_columns: 
            #label_col_index = self.label_columns_indices.get(plot_col, None)
            label_col_index = self.label_columns_indices.get(plot_col)
            print(label_col_index)
        else: 
            label_col_index= plot_col_index

        if label_col_index is None: 
            continue
    
        plt.scatter(self.label_indices, labels[n, :, label_col_index],
                    edgecolors='k', label='Labels', c='#2ca02c', s=64)
        if model is not None:
            predictions_, predictions= model.predict(inputs)## Consider adding predict??
            print(Predictions)
            print(loss)
            plt.scatter(self.label_indices, predictions[n, :, label_col_index],
                        marker='X', edgecolors='k', label='Predictions',
                        c='#ff7f0e', s=64)

        if n == 0:
            plt.legend()


    plt.xlabel('Time [h]')

WindowGenerator.plot=plot

In [None]:
w1.plot()

In [None]:
'''Convert data to tensor with certain batch_size'''
def make_dataset(self, data): 
    ## Convert data to Numpy array
    data= np.array(data, dtype= np.float32)

    ds= tf.keras.preprocessing.timeseries_dataset_from_array(
      data=data, 
      targets=None, 
      sequence_length= self.total_window_size, 
      sequence_stride=1, 
      shuffle= True, 
      batch_size=32, )
    ds= ds.map(self.split_window)
    return ds

WindowGenerator.make_dataset= make_dataset
'''The WindowGenerator adding property to access training - val datset'''
## Traindata
@property
def train(self): 
    return self.make_dataset(self.train_df)
#Valdata
@property
def val(self): 
    return self.make_dataset(self.val_df)
@property
def example(self): 
    """Get and Cache an example batch of 'Input', 'labels' for plotting"""
    result= getattr(self, '_example', None)
    if result is None: 
        #No example batch was found, so get from the train dataset
        result= next(iter(self.train))
        #cache it for the next time
        self._example= result
    return result

WindowGenerator.train=train
WindowGenerator.val=val
WindowGenerator.example=example

In [None]:
w1.plot()

In [None]:
#1 Inspecting data 
print(w1.train.element_spec)
#2 Iterate over dataset and Yield concrete batches
for example_inputs, example_labels in w1.train.take(1): 
  print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
  print(f'Labels shape(batch, time, features): {example_labels.shape}')

In [None]:
def compile_and_fit(model, window, patience=20):
  early_stopping= tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                                                   patience= patience, mode='min')
  model.compile(loss=tf.losses.MeanSquaredError(), 
                optimizer=tf.optimizers.Adam(), 
                metrics=[tf.metrics.MeanAbsoluteError()])
  history= model.fit_generator(window.train, epochs=EPOCHS, 
                     validation_data=window.val, callbacks=[early_stopping]
                     )
  return history

In [None]:
import tensorflow as tf
Dense = tf.keras.Sequential([
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=1)
])

multiple_steps_window = WindowGenerator(
    input_width=24, label_width=24, shift=1,
    train_df=training_df, val_df=val_df, 
                     label_columns='Hire_count')

print('Input shape', multiple_steps_window.example[0].shape)
print('Output shape', Dense(multiple_steps_window.example[0]).shape)


def compile_and_fit(model, window, patience=20):
  early_stopping= tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                                                   patience= patience, mode='min')
  model.compile(loss=tf.losses.MeanSquaredError(), 
                optimizer=tf.optimizers.Adam(), 
                metrics=[tf.metrics.MeanAbsoluteError()])
  history= model.fit_generator(window.train, epochs=EPOCHS, 
                     validation_data=window.val, callbacks=[early_stopping]
                     )
  return history, model
EPOCHS=50
val_performance = {}
history, model= compile_and_fit(Dense, multiple_steps_window)
val_performance['Dense_multiple_steps'] = Dense.evaluate(multiple_steps_window.val)

In [None]:
Dense.save_weights('Dense_multiple_steps_ouputs.h5')
multiple_steps_window.plot(model= Dense)