In [2]:
import json
import gzip
import boto3
import os
import pandas as pd
from flatten_json import flatten
from sklearn import preprocessing
import psycopg2

In [3]:
QUEUE_NAME = "login-queue"

In [4]:
def send_messages():
    sqs = boto3.client("sqs")
    queue_url = sqs.create_queue(QueueName=QUEUE_NAME)["QueueUrl"]
    print(f"queue_url: [{queue_url}]")

    with gzip.open("data/sample_data.json.gz", "r") as f:
        data = json.load(f)
        print(data)

    assert len(data) == 100

    for record in data:
        sqs.send_message(QueueUrl=queue_url, MessageBody=json.dumps(record))

    return

In [5]:
with open('data/sample_data.json.gz','rb') as f:
    gzip_fd = gzip.GzipFile(fileobj=f)
    d = json.loads(gzip_fd.read())

df = pd.json_normalize(d)
df

Unnamed: 0,user_id,app_version,device_type,ip,locale,device_id,foo,bar
0,424cdd21-063a-43a7-b91b-7ca1a833afae,2.3.0,android,199.172.111.135,RU,593-47-5928,,
1,c0173198-76a8-4e67-bfc2-74eaa3bbff57,0.2.6,ios,241.6.88.151,PH,104-25-0070,,
2,66e0635b-ce36-4ec7-aa9e-8a8fca9b83d4,2.2.1,ios,130.111.167.54,,127-42-0862,,
3,181452ad-20c3-4e93-86ad-1934c9248903,0.96,android,118.79.6.245,ID,190-44-3099,,
4,60b9441c-e39d-406f-bba0-c7ff0e0ee07f,0.4.6,android,223.31.97.46,FR,149-99-5185,,
...,...,...,...,...,...,...,...,...
95,2ad5985b-dc70-44ca-b3ed-f4246a42f611,1.35,android,229.70.21.221,PT,284-81-7238,,
96,ca322b6c-97f8-4075-9663-42cebeb1d26e,0.38,android,235.162.61.43,BG,812-95-5877,,
97,d4f4f380-349e-48ac-bdc3-5ee4e29b57c1,3.2.0,ios,200.100.97.178,FR,317-03-6529,,
98,792e3e1f-bf84-409e-925c-d653fab4b6be,1.4,ios,164.197.201.156,TH,116-92-9733,,


Import label encoder

In [6]:
# label_encoder object knows how to understand word labels.
label_encoder = preprocessing.LabelEncoder()

In [7]:
# Encode labels in column 'device_id' and 'ip'
df['masked_device_id']= label_encoder.fit_transform(df['device_id'])
df['masked_ip']= label_encoder.fit_transform(df['ip'])

In [9]:
df = df.drop(columns=['foo','bar'])
df['create_date'] = pd.Timestamp.today().strftime('%Y-%m-%d')
df = df[['user_id', 'device_type', 'masked_ip','masked_device_id','locale','app_version','create_date']]
df

Unnamed: 0,user_id,device_type,masked_ip,masked_device_id,locale,app_version,create_date
0,424cdd21-063a-43a7-b91b-7ca1a833afae,android,39,64,RU,2.3.0,2022-12-29
1,c0173198-76a8-4e67-bfc2-74eaa3bbff57,ios,66,1,PH,0.2.6,2022-12-29
2,66e0635b-ce36-4ec7-aa9e-8a8fca9b83d4,ios,11,6,,2.2.1,2022-12-29
3,181452ad-20c3-4e93-86ad-1934c9248903,android,7,13,ID,0.96,2022-12-29
4,60b9441c-e39d-406f-bba0-c7ff0e0ee07f,android,54,9,FR,0.4.6,2022-12-29
...,...,...,...,...,...,...,...
95,2ad5985b-dc70-44ca-b3ed-f4246a42f611,android,57,24,PT,1.35,2022-12-29
96,ca322b6c-97f8-4075-9663-42cebeb1d26e,android,61,94,BG,0.38,2022-12-29
97,d4f4f380-349e-48ac-bdc3-5ee4e29b57c1,ios,40,32,FR,3.2.0,2022-12-29
98,792e3e1f-bf84-409e-925c-d653fab4b6be,ios,23,4,TH,1.4,2022-12-29


In [10]:
def get_secret(secret_name='wysde'):
    region_name = "us-east-1"
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name)
    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
    get_secret_value_response = json.loads(get_secret_value_response['SecretString'])
    return get_secret_value_response

In [11]:
secret_vals = get_secret()

postgres_endpoint = secret_vals['RDS_POSTGRES_HOST']
postgres_user = secret_vals['RDS_POSTGRES_USERNAME']
postgres_pass = secret_vals['RDS_POSTGRES_PASSWORD']
port = 5432
dbname = "sparsh"

In [23]:
# establishing the connection
conn = psycopg2.connect(database=dbname, 
                        user=postgres_user, 
                        password=postgres_pass, 
                        host=postgres_endpoint, 
                        port=port)

In [24]:
# Creating a cursor object using the cursor() method
cursor = conn.cursor()

In [25]:
cursor.execute('CREATE TABLE IF NOT EXISTS user_login(user_id  varchar(128), device_type varchar(32),masked_ip varchar(256),masked_device_id varchar(256),locale varchar(32),app_version varchar(32),create_date date)')
print('Creation of table succeeded')

Creation of table succeeded


In [26]:
for i in df.index:
    cols  = ','.join(list(df.columns))
    vals  = [df.at[i,col] for col in list(df.columns)]
    query = "INSERT INTO user_login VALUES('%s','%s','%s','%s','%s','%s','%s')" % (vals[0], vals[1], vals[2],vals[3],vals[4],vals[5],vals[6])
    cursor.execute(query)

In [27]:
# fetching all rows
sql1='''select * from user_login'''
cursor.execute(sql1)
for i in cursor.fetchall():
    print(i)

('424cdd21-063a-43a7-b91b-7ca1a833afae', 'android', '39', '64', 'RU', '2.3.0', datetime.date(2022, 12, 29))
('c0173198-76a8-4e67-bfc2-74eaa3bbff57', 'ios', '66', '1', 'PH', '0.2.6', datetime.date(2022, 12, 29))
('66e0635b-ce36-4ec7-aa9e-8a8fca9b83d4', 'ios', '11', '6', 'None', '2.2.1', datetime.date(2022, 12, 29))
('181452ad-20c3-4e93-86ad-1934c9248903', 'android', '7', '13', 'ID', '0.96', datetime.date(2022, 12, 29))
('60b9441c-e39d-406f-bba0-c7ff0e0ee07f', 'android', '54', '9', 'FR', '0.4.6', datetime.date(2022, 12, 29))
('5082b1ae-6523-4e3b-a1d8-9750b4407ee8', 'android', '62', '34', 'None', '3.7', datetime.date(2022, 12, 29))
('5bc74293-3ca1-4f34-bb89-523887d0cc2f', 'ios', '65', '87', 'PT', '2.2.8', datetime.date(2022, 12, 29))
('92d8ceec-2e12-49f3-81bd-518fe66971ec', 'android', '38', '90', 'BR', '0.5.5', datetime.date(2022, 12, 29))
('05e153b1-4fa1-474c-bd7e-9f74d1c495e7', 'android', '22', '43', 'None', '0.5.0', datetime.date(2022, 12, 29))
('325c0f3d-da25-45ff-aff4-81816db069bc', 