In [1]:
import sys
feature_store_env = '/home/ec2-user/SageMaker/custom-miniconda/miniconda/envs/feature_store/lib/python3.8/site-packages'
sys.path.append(feature_store_env)

In [3]:
import dateutil.tz
import datetime as dt
import json
import awswrangler as wr
from feature_store import feature_store
from feature_store.feature_table import feature_table
from feature_store.value_type import ValueType
import feature_store.config as config
from io import StringIO
import urllib3
import logging
import sys
from json import dumps
import time
import pandas as pd

def get_ymd(datetime):
    year = datetime.year
    month = datetime.month
    day = datetime.day
            
    if month < 10:
        month = '0' + str(month)
    if day < 10:
        day = '0' + str(day)
    return year, month, day

def first_day_next_month(date):
    return (date.replace(day=1) + dt.timedelta(days=32)).replace(day=1)

def last_second_of_month(date: str) -> str:
    return str((pd.Timestamp(date) + pd.offsets.MonthEnd(0)).date()) + " 23:59:59"

def first_second_of_month(date: str) -> str:
    return str((pd.Timestamp(date) + pd.offsets.MonthBegin(0)).date()) + " 00:00:00"

streamer = StringIO()

def setup_logging():
    logger = logging.getLogger()
    for h in logger.handlers:
        logger.removeHandler(h)
     
    h = logging.StreamHandler(stream = streamer)
    h.setFormatter(logging.Formatter("%(asctime)s %(levelname)s: %(message)s",
                              "%Y-%m-%d %H:%M:%S"))
    logger.addHandler(h)
    logger.setLevel(logging.INFO)
    return logger

def query_log(query_id, table, logger):
    status = wr.athena.get_query_execution(query_id)['Status']['State']
    if wr.athena.get_query_execution(query_id)['Status']['State'] in ['FAILED', 'CANCELLED']:
        logger.critical(table + ': query is in ' + status + ' State. ' + 'QueryID: ' + query_id)
    else:
        logger.info(table + ': query is in ' + status + ' State. ' + 'QueryID: ' + query_id)
    return None

http = urllib3.PoolManager()
url = 'https://chat.googleapis.com/v1/spaces/AAAALuxU48o/messages?key=AIzaSyDdI0hCZtE6vySjMm-WEfRq3CPzqKqqsHI&token=T1j8SVrn051V2f9q0wxFMbbI5DkIH2IKTxPYy3TnP9Q%3D'
fs = feature_store.feature_store()

zone = dateutil.tz.gettz('Asia/Calcutta')

logger = setup_logging()

now = dt.datetime.now(zone)
current_hour = now.replace(minute=0, second=0, microsecond=0)
current_hour_s = current_hour.strftime("%Y-%m-%d %H:%M:%S")

def query_progress(query_id, run_async, table_name):
    if not run_async:
            status = wr.athena.get_query_execution(query_id)['Status']['State']
            while status not in ('SUCCEEDED'):
                if status in ['RUNNING', 'QUEUED']:
                    status = wr.athena.get_query_execution(query_id)['Status']['State']
                elif status == 'FAILED':
                    print('Query Failed')
                    break
                elif status == 'CANCELLED':
                    print('Query Cancelled')
                    break
    else:
        status = wr.athena.get_query_execution(query_id)['Status']['State']
        while status not in ('RUNNING'):
            if status == 'QUEUED':
                time.sleep(2)
                status = wr.athena.get_query_execution(query_id)['Status']['State']
            elif status == 'SUCCEEDED':
                print('Query Succeeded')
                break
            elif status == 'FAILED':
                print('Query Failed')
                break
            elif status == 'CANCELLED':
                print('Query Cancelled')
                break
    query_log(query_id, table_name, logger)
    return status

In [4]:
query = '''with sp_mapping_temp as (
SELECT  id as fleet_owner_id, max_by(cast(phone_no as varchar), updated_at) as mobile_no,
max_by(cast(iam_id as int), updated_at) as sp_id  FROM  "awsdatacatalog"."supply_team"."supply_team_blackbuck_fleetapp_fleetowner"
group by 1
),

sp_mapping as (
select fleet_owner_id, mobile_no, cast(sp_id as bigint) as sp_id 
from sp_mapping_temp 
where mobile_no in (
select mobile_no from (
select mobile_no, count(*) as sp_count from sp_mapping_temp
group by 1
having count(*)<=1))
),

--- Truck Mapping to SP ID
truck_mapping as (
select ft.id as truck_id,
ft.truck_no as truck_number,
tor.fleet_owner_id as fleet_owner_id,
s.sp_id
from "awsdatacatalog"."supply_team"."supply_team_blackbuck_fleetapp_truck" ft
inner join "awsdatacatalog"."supply_team"."supply_team_blackbuck_truck_owner_request" tor on ft.id = tor.truck_id
inner join sp_mapping s on s.fleet_owner_id = tor.fleet_owner_id
where 
ft.truck_no != ''
and tor.fleet_owner_id is not null
and tor.kyc_status_v2 ='APPROVED'
and ft.is_truck = 'VERIFIED'
and ft.is_verified != 3
group by 1,2,3,4
),

--- Single Truck FOs/SPs
single_truck_sps as (
select
tm.*,
tmc.number_of_trucks
from truck_mapping tm
left join (select sp_id, count(distinct truck_number) as number_of_trucks from truck_mapping group by 1) tmc on tmc.sp_id = tm.sp_id
where tmc.number_of_trucks = 1
),

------ Plaza to District Mapping
district_boundaries as (
select place_id as district_id, 
name as district_name, 
ST_GeomFromBinary(from_hex(to_utf8(replace(boundary_geog,'20E61000')))) as boundary_geog,
ST_X(ST_Centroid(ST_GeomFromBinary(from_hex(to_utf8(replace(boundary_geog,'20E61000')))))) as longitude,
ST_Y(ST_Centroid(ST_GeomFromBinary(from_hex(to_utf8(replace(boundary_geog,'20E61000')))))) as latitude
from location_service.public.admin_area where deleted = false
and local_tag = 'DISTRICT'
and boundary_geog != ''
),

distict_distance as (
select a.district_id as from_district_id,
b.district_id as to_district_id,
cast(great_circle_distance(a.latitude, a.longitude, b.latitude, b.longitude) as int) as distance
from district_boundaries a
cross join district_boundaries b
),

---- Semantics District Vectors
semantics_score_district_temp as (
	SELECT *
	FROM (
			SELECT t.*,
				ROW_NUMBER() OVER (
					PARTITION BY bb_place_id
					ORDER BY created_timestamp DESC
				) AS rnk
			FROM "awsdatacatalog"."feature_store"."semantics_from_district" t
		)
	WHERE rnk = 1
),

semantics_score_district_map as (
	select bb_place_id as district_id,
		MAP_FROM_ENTRIES(
			ARRAY [ ('f_1', f_1),
			('f_2', f_2),
			('f_3', f_3),
			('f_4', f_4),
			('f_5', f_5),
			('f_6', f_6),
			('f_7', f_7),
			('f_8', f_8),
			('f_9', f_9),
			('f_10', f_10),
			('f_11', f_11),
			('f_12', f_12),
			('f_13', f_13),
			('f_14', f_14),
			('f_15', f_15),
			('f_16', f_16),
			('f_17', f_17),
			('f_18', f_18),
			('f_19', f_19),
			('f_20', f_20),
			('f_21', f_21),
			('f_22', f_22),
			('f_23', f_23),
			('f_24', f_24),
			('f_25', f_25),
			('f_26', f_26),
			('f_27', f_27),
			('f_28', f_28),
			('f_29', f_29),
			('f_30', f_30),
			('f_31', f_31),
			('f_32', f_32),
			('f_33', f_33),
			('f_34', f_34),
			('f_35', f_35),
			('f_36', f_36),
			('f_37', f_37),
			('f_38', f_38),
			('f_39', f_39),
			('f_40', f_40),
			('f_41', f_41),
			('f_42', f_42),
			('f_43', f_43),
			('f_44', f_44),
			('f_45', f_45),
			('f_46', f_46),
			('f_47', f_47),
			('f_48', f_48),
			('f_49', f_49),
			('f_50', f_50),
			('f_51', f_51),
			('f_52', f_52),
			('f_53', f_53),
			('f_54', f_54),
			('f_55', f_55),
			('f_56', f_56),
			('f_57', f_57),
			('f_58', f_58),
			('f_59', f_59),
			('f_60', f_60),
			('f_61', f_61),
			('f_62', f_62),
			('f_63', f_63),
			('f_64', f_64),
			('f_65', f_65),
			('f_66', f_66),
			('f_67', f_67),
			('f_68', f_68),
			('f_69', f_69),
			('f_70', f_70),
			('f_71', f_71),
			('f_72', f_72),
			('f_73', f_73),
			('f_74', f_74),
			('f_75', f_75),
			('f_76', f_76),
			('f_77', f_77),
			('f_78', f_78),
			('f_79', f_79),
			('f_80', f_80),
			('f_81', f_81),
			('f_82', f_82),
			('f_83', f_83),
			('f_84', f_84),
			('f_85', f_85),
			('f_86', f_86),
			('f_87', f_87),
			('f_88', f_88),
			('f_89', f_89),
			('f_90', f_90),
			('f_91', f_91),
			('f_92', f_92),
			('f_93', f_93),
			('f_94', f_94),
			('f_95', f_95),
			('f_96', f_96),
			('f_97', f_97),
			('f_98', f_98),
			('f_99', f_99),
			('f_100', f_100),
			('f_101', f_101),
			('f_102', f_102),
			('f_103', f_103),
			('f_104', f_104),
			('f_105', f_105),
			('f_106', f_106),
			('f_107', f_107),
			('f_108', f_108),
			('f_109', f_109),
			('f_110', f_110),
			('f_111', f_111),
			('f_112', f_112),
			('f_113', f_113),
			('f_114', f_114),
			('f_115', f_115),
			('f_116', f_116),
			('f_117', f_117),
			('f_118', f_118),
			('f_119', f_119),
			('f_120', f_120),
			('f_121', f_121),
			('f_122', f_122),
			('f_123', f_123),
			('f_124', f_124),
			('f_125', f_125),
			('f_126', f_126),
			('f_127', f_127),
			('f_128', f_128),
			('f_129', f_129),
			('f_130', f_130),
			('f_131', f_131),
			('f_132', f_132),
			('f_133', f_133),
			('f_134', f_134),
			('f_135', f_135),
			('f_136', f_136),
			('f_137', f_137),
			('f_138', f_138),
			('f_139', f_139),
			('f_140', f_140),
			('f_141', f_141),
			('f_142', f_142),
			('f_143', f_143),
			('f_144', f_144),
			('f_145', f_145),
			('f_146', f_146),
			('f_147', f_147),
			('f_148', f_148),
			('f_149', f_149),
			('f_150', f_150),
			('f_151', f_151),
			('f_152', f_152),
			('f_153', f_153),
			('f_154', f_154),
			('f_155', f_155),
			('f_156', f_156),
			('f_157', f_157),
			('f_158', f_158),
			('f_159', f_159),
			('f_160', f_160),
			('f_161', f_161),
			('f_162', f_162),
			('f_163', f_163),
			('f_164', f_164),
			('f_165', f_165),
			('f_166', f_166),
			('f_167', f_167),
			('f_168', f_168),
			('f_169', f_169),
			('f_170', f_170),
			('f_171', f_171),
			('f_172', f_172),
			('f_173', f_173),
			('f_174', f_174),
			('f_175', f_175),
			('f_176', f_176),
			('f_177', f_177),
			('f_178', f_178),
			('f_179', f_179),
			('f_180', f_180),
			('f_181', f_181),
			('f_182', f_182),
			('f_183', f_183),
			('f_184', f_184),
			('f_185', f_185),
			('f_186', f_186),
			('f_187', f_187),
			('f_188', f_188),
			('f_189', f_189),
			('f_190', f_190),
			('f_191', f_191),
			('f_192', f_192),
			('f_193', f_193),
			('f_194', f_194),
			('f_195', f_195),
			('f_196', f_196),
			('f_197', f_197),
			('f_198', f_198),
			('f_199', f_199),
			('f_200', f_200),
			('f_201', f_201),
			('f_202', f_202),
			('f_203', f_203),
			('f_204', f_204),
			('f_205', f_205),
			('f_206', f_206),
			('f_207', f_207),
			('f_208', f_208),
			('f_209', f_209),
			('f_210', f_210),
			('f_211', f_211),
			('f_212', f_212),
			('f_213', f_213),
			('f_214', f_214),
			('f_215', f_215),
			('f_216', f_216),
			('f_217', f_217),
			('f_218', f_218),
			('f_219', f_219),
			('f_220', f_220),
			('f_221', f_221),
			('f_222', f_222),
			('f_223', f_223),
			('f_224', f_224),
			('f_225', f_225),
			('f_226', f_226),
			('f_227', f_227),
			('f_228', f_228),
			('f_229', f_229),
			('f_230', f_230),
			('f_231', f_231),
			('f_232', f_232),
			('f_233', f_233),
			('f_234', f_234),
			('f_235', f_235),
			('f_236', f_236),
			('f_237', f_237),
			('f_238', f_238),
			('f_239', f_239),
			('f_240', f_240),
			('f_241', f_241),
			('f_242', f_242),
			('f_243', f_243),
			('f_244', f_244),
			('f_245', f_245),
			('f_246', f_246),
			('f_247', f_247),
			('f_248', f_248),
			('f_249', f_249),
			('f_250', f_250) ]
		) as features,
		1 as key
	from semantics_score_district_temp
),

district_similarity as (
select a.district_id as from_district_id,
	b.district_id as to_district_id,
	cast(
		round(cosine_similarity(a.features, b.features), 2) * 100 as int
	) as similarity
from semantics_score_district_map a
	left join semantics_score_district_map b on a.key = b.key
group by 1, 2, 3
),

output as (
select * from awsdatacatalog.feature_store.avl_training_output_v2
where sp_id in (select sp_id from single_truck_sps)
and district_id in (select district_id from district_boundaries)
),
--- Merge Output with GPS Transactions data
gps_merge_temp as (
select o.*,
s.truck_number,
gps.entity as district_id_gps,
gps.event_timestamp as event_timestamp_gps,
gps.total_dwell_time,
gps.total_speed,
gps.total_is_ignition_off,
gps.total_records,
d.distance,

case
    when gps.entity = LAG(gps.entity, 1) OVER (PARTITION BY o.id ORDER BY gps.event_timestamp DESC) then 0

    else 1
end as flag,
date_diff('day',gps.event_timestamp,o.event_timestamp) as time_diff,
case
    when date_diff('day',gps.event_timestamp,o.event_timestamp)<=1 then 'day_1'
     when date_diff('day',gps.event_timestamp,o.event_timestamp)<=2 and date_diff('day',gps.event_timestamp,o.event_timestamp)>1 then 'day_2'
     when date_diff('day',gps.event_timestamp,o.event_timestamp)<=3 and date_diff('day',gps.event_timestamp,o.event_timestamp)>2 then 'day_3'
     when date_diff('day',gps.event_timestamp,o.event_timestamp)<=4 and date_diff('day',gps.event_timestamp,o.event_timestamp)>3 then 'day_4'
     when date_diff('day',gps.event_timestamp,o.event_timestamp)<=5 and date_diff('day',gps.event_timestamp,o.event_timestamp)>4 then 'day_5'
    else 'others'
end as day_flag

from output o
inner join single_truck_sps s on s.sp_id = o.sp_id
inner join gps_features_district gps on s.truck_number = gps.truck_number and o.event_timestamp>gps.event_timestamp and gps.event_timestamp>=o.event_timestamp - interval '5' day
inner join distict_distance d on d.from_district_id = o.district_id and d.to_district_id = gps.entity
),

gps_day_level_temp as (
select
id,
sp_id,
truck_number,
district_id,
state_id,
event_timestamp,
day_flag,
availability_flag,
max(total_dwell_time) as total_dwell_time,
max_by(total_is_ignition_off, total_dwell_time) as total_is_ignition_off,
max_by(district_id_gps, total_dwell_time) as district_id_gps
from gps_merge_temp
where day_flag != 'others'
group by 1,2,3,4,5,6,7,8
),

day_level_features as (
select gps.*,
COALESCE(d.similarity, -100) as district_similarity
from gps_day_level_temp gps
left join district_similarity d on gps.district_id = d.from_district_id and gps.district_id_gps = d.to_district_id
),


gps_trajectory as (
select
id,
sp_id,
truck_number,
event_timestamp,
district_id, state_id,
availability_flag,
array_agg(array[cast(district_similarity as int), cast(total_dwell_time as int), cast(total_is_ignition_off as int)] order by day_flag ASC) as st_features
from day_level_features
group by 1,2,3,4,5,6,7
order by sp_id, event_timestamp, district_id
),

gps_district_characteristics_temp as (
select
f.id,
f.truck_number,
max_by(gps_agg.total_dwell_time, gps_agg.event_timestamp) as total_dwell_time,
max_by(gps_agg.total_speed, gps_agg.event_timestamp) as total_speed_agg,
max_by(gps_agg.total_is_ignition_off, gps_agg.event_timestamp) as total_is_ignition_off_agg,
max_by(gps_agg.total_records, gps_agg.event_timestamp) as total_records_agg
from gps_merge_temp f
inner join gps_features_district_aggregate gps_agg on f.truck_number = gps_agg.truck_number and f.district_id = gps_agg.entity
and f.event_timestamp>gps_agg.event_timestamp
group by 1,2
),

gps_district_characteristics as (
select
id,
ARRAY[cast(COALESCE(total_dwell_time,0) as int),
         cast(COALESCE(total_speed_agg,0) as int),
         cast(COALESCE(total_is_ignition_off_agg,0) as int),
         cast(COALESCE(total_records_agg,0) as int)] as agg_features
from gps_district_characteristics_temp
)
select
o.id,o.sp_id,s.truck_number,o.event_timestamp,case when o.availability_flag = 'AVAILABLE' then 1 else 0 end as available_flag,  o.district_id,
gps_tj.st_features,
gps_agg.agg_features
from output o
inner join single_truck_sps s on s.sp_id = o.sp_id
inner join gps_trajectory gps_tj on gps_tj.id=o.id
inner join gps_district_characteristics gps_agg on gps_agg.id = o.id
group by 1,2,3,4,5,6,7,8
'''

In [5]:
df= wr.athena.read_sql_query(query, 
                                   database = config.feature_db, 
                                   workgroup = config.work_group,
                                   s3_output = config.s3_athena_output,
                               ctas_approach=True
                                  )

In [6]:
df.shape

(80512, 8)

In [7]:
df['id'].nunique()

80512

In [8]:
df.columns

Index(['id', 'sp_id', 'truck_number', 'event_timestamp', 'available_flag',
       'district_id', 'st_features', 'agg_features'],
      dtype='object')

In [9]:
df['truck_number'].nunique()

5165

In [10]:
df['district_id'].nunique()

568

In [11]:
training_data = df.copy()

In [12]:
training_data.shape

(80512, 8)

In [13]:
training_data.head(2)

Unnamed: 0,id,sp_id,truck_number,event_timestamp,available_flag,district_id,st_features,agg_features
0,459980,1338338,BR21GA9140,2021-09-14 16:00:00,1,aa681238547580178432,"[[38, 2452, 969], [30, 1739, 9652], [30, 1359,...","[3804, 32864, 21091, 22898]"
1,3839426,947574,BR06GA6144,2021-06-03 09:00:00,0,aa681238548557451264,"[[-8, 3568, 0], [-8, 2012, 0]]","[155, 29569, 7, 713]"


In [14]:
# training_data['available_flag'] = training_data['availability_flag'].apply(lambda x: 1 if x == 'AVAILABLE' else 0)

In [15]:
training_data.head(1)

Unnamed: 0,id,sp_id,truck_number,event_timestamp,available_flag,district_id,st_features,agg_features
0,459980,1338338,BR21GA9140,2021-09-14 16:00:00,1,aa681238547580178432,"[[38, 2452, 969], [30, 1739, 9652], [30, 1359,...","[3804, 32864, 21091, 22898]"


In [16]:
# training_data = training_data[training_data['agg_features'].notna()]

In [17]:
df.columns

Index(['id', 'sp_id', 'truck_number', 'event_timestamp', 'available_flag',
       'district_id', 'st_features', 'agg_features'],
      dtype='object')

In [18]:
# df[['st_features','available_flag']].to_csv('avail_manifest_v0.csv')

In [19]:
import pandas as pd
import numpy as np
import keras
from keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Embedding,Reshape


Using TensorFlow backend.


In [20]:
training_data.columns

Index(['id', 'sp_id', 'truck_number', 'event_timestamp', 'available_flag',
       'district_id', 'st_features', 'agg_features'],
      dtype='object')

In [21]:
training_data['st_features'][0]

array([array([  38, 2452,  969], dtype=int32),
       array([  30, 1739, 9652], dtype=int32),
       array([  30, 1359, 7939], dtype=int32),
       array([   30,  2667, 15067], dtype=int32),
       array([   30,  2247, 12820], dtype=int32)], dtype=object)

In [22]:
training_data.shape

(80512, 8)

In [23]:
import tensorflow as tf

output = np.array(training_data.available_flag.to_list())
output = tf.convert_to_tensor(output, np.int8)

### LSTM Features
st_features = []
for i in training_data.st_features.to_list():
    i = i.tolist()
    k = []
    for j in i:
        k.append(j.tolist())
    st_features.append(k)

st_features = tf.keras.preprocessing.sequence.pad_sequences(
    st_features, maxlen=5,padding="post"
)

st_features = tf.convert_to_tensor(st_features, np.int8)


## agg features



agg_features = []
for i in training_data.agg_features.to_list():
    i = i.tolist()
    agg_features.append(i)
    
agg_features = tf.convert_to_tensor(agg_features, np.int8)

In [24]:
# gps_characteristics = []

# for i in range(len())

In [25]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam

def Find_Optimal_Cutoff(target, predicted):
    fpr, tpr, threshold = roc_curve(target, predicted)
    i = np.arange(len(tpr)) 
    roc = pd.DataFrame({'tf' : pd.Series(tpr-(1-fpr), index=i), 'threshold' : pd.Series(threshold, index=i)})
    roc_t = roc.iloc[(roc.tf-0).abs().argsort()[:1]]

    return list(roc_t['threshold']) 

class prediction_history(Callback):
    def __init__(self):
        self.predhis = []
    def on_epoch_end(self, epoch, logs={}):
        self.predhis.append(model.predict([x_test]))

predictions=prediction_history()

early_stopping = EarlyStopping(monitor='val_loss', patience=4, min_delta=0)
bst_model_path =  'truck_availability_gps_only_model_tf.h5'
model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=False)

In [26]:
n_timesteps = st_features.shape[1]
n_features = st_features.shape[2]
n_agg_features = agg_features.shape[1]
# n_agg_features = 

In [27]:
# total_speed_agg.shape

In [28]:
ts_input = Input(shape=(n_timesteps,n_features))
main_input_lstm = LSTM(200, activation='relu')(ts_input)
st_input = Dropout(0.5)(main_input_lstm)

agg_input = Input(shape=(n_agg_features,))
merged = tf.keras.layers.Concatenate(axis=1)([st_input,agg_input])


x = Dense(256, activation='relu')(merged)
x = Dense(128, activation='relu')(x)
x = Dense(64, activation='relu')(x)
main_output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=[ts_input,agg_input], outputs= [main_output])

model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 5, 3)]       0                                            
__________________________________________________________________________________________________
lstm (LSTM)                     (None, 200)          163200      input_1[0][0]                    
__________________________________________________________________________________________________
dropout (Dropout)               (None, 200)          0           lstm[0][0]                       
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 4)]          0                                            
______________________________________________________________________________________________

In [29]:
training_data.shape

(80512, 8)

In [30]:
training_data['id'].nunique()

80512

In [31]:
# model.fit(st_features, output, 
#           epochs=30,  batch_size=1024, 
#           verbose = True, validation_split=0.2,
#           callbacks=[model_checkpoint, early_stopping])

In [32]:
# a

In [33]:
model.fit([st_features,agg_features], output, 
          epochs=50,  batch_size=1024, 
          verbose = True, validation_split=0.25,
          callbacks=[model_checkpoint, early_stopping])

[2022-01-13 08:07:29.758 ip-172-16-50-196:25263 INFO utils.py:27] RULE_JOB_STOP_SIGNAL_FILENAME: None
[2022-01-13 08:07:29.783 ip-172-16-50-196:25263 INFO profiler_config_parser.py:111] Unable to find config at /opt/ml/input/config/profilerconfig.json. Profiler is disabled.
Train on 60384 samples, validate on 20128 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50


<tensorflow.python.keras.callbacks.History at 0x7ff50b6bae80>

# Train without 

In [None]:
# len(features_list),len(features_list[0]),len(features_list[0][0])

In [53]:
# features_list.shape

In [54]:
# features_list = np.array(sequences_features)

In [35]:
# 20337*25

In [55]:
# features_list = np.reshape(features_list,( features_list.shape[0],features_list.shape[1],4))

In [25]:
# features_list=np.array(features_list).astype(np.int32)

In [56]:
# import tensorflow as tf

# tf.convert_to_tensor(features_list, dtype=tf.int32) 

In [57]:
# (features_list[0])

In [58]:
# model.fit([features_list[:int(len(features_list)*.8)]], output[:int(len(features_list)*.8)], epochs=10,batch_size=10,
#           validation_data = ([features_list[int(len(features_list)*.8):]], output[int(len(features_list)*.8):]),
#           verbose=True)
