### Installing required libraries

In [None]:
%pip install psycopg2

In [None]:
import psycopg2
import pandas as pd

### Defining functions

In [None]:
def create_database(dbName):
    # connect to default db
    conn = psycopg2.connect("host=localhost dbname=postgres user=postgres password=password")
    conn.set_session(autocommit=True)
    cur = conn.cursor()
    
    # create desired db
    cur.execute("drop database if exists {}".format(dbName))
    cur.execute("create database {}".format(dbName))
    
    conn.close()
    
    # reconnect to newly created db
    conn = psycopg2.connect("host=localhost dbname={} user=postgres password=password".format(dbName))
    conn.set_session(autocommit=True)
    cur = conn.cursor()
    
    return cur, conn

In [None]:
def create_table(ddl):
    try:
        cur.execute(ddl)
    except psycopg2.Error as e:
        print("Error: failure to create table")
        print(e)
    else:
        print("Table created")

 ### Creating tables


In [None]:
insta_influencers_ddl = ("""CREATE TABLE public.Insta_Influencer (username varchar(100) not null, name varchar(300) not null, followers varchar(50) not null, audience_country varchar(50) not null,
    authentic_engagement varchar(50) not null, engagement_avg varchar(50) not null, category_1 varchar(100) null, category_2 varchar(100) null);""")

In [None]:
insta_influencers_june_ddl = ("CREATE TABLE public.insta_influencer_June ( username varchar(100) NOT NULL, name varchar(300) NOT NULL, \
	subscribers_count varchar(50) NOT NULL, \
	audience_country varchar(50) NOT NULL, \
	likes_avg varchar(50) NOT NULL, \
	comments_avg varchar(50) NOT NULL, \
	category_1 varchar(100) NULL, \
	category_2 varchar(100) NULL \
);")

In [None]:
insta_influencers_sep_ddl = ("CREATE TABLE public.insta_influencer_Sep ( \
	sr_no int not null, \
	username varchar(100) NOT NULL, \
	name varchar(300) NOT NULL, \
	subscribers varchar(50) NOT NULL, \
	audience_country varchar(50) NOT NULL, \
	authentic_engagement varchar(50) not null, \
	engagement_avg varchar(50) NOT NULL, \
	category_1 varchar(100) NULL, \
	category_2 varchar(100) NULL \
);")

In [None]:
cur, conn = create_database("smi")

In [None]:
create_table(insta_influencers_ddl)
create_table(insta_influencers_june_ddl)
create_table(insta_influencers_sep_ddl)

### Loading files and inserting into tables

##### SMI

In [None]:
insta_influencers_df = pd.read_csv(r'C:\Umair_Workspace\Development\Projects\P1_SMI\Social-Media-Influencers\Dataset\Social Media Influencers in 2022\social media influencers - instagram.csv')
print(insta_influencers_df)

In [None]:
insta_influencers_insertstmt = ("INSERT INTO public.insta_influencer \
        (username, name, category_1, category_2, followers, audience_country, authentic_engagement, engagement_avg) \
        VALUES(%s, %s, %s, %s, %s, %s, %s, %s)")

In [None]:
try:
    for i, row in insta_influencers_df.iterrows():
        cur.execute(insta_influencers_insertstmt, list(row))
except psycopg2.Error as e:
    print("Error: unable to insert data")
    print(e)

In [None]:
try:
    cur.execute("select * from public.insta_influencer limit 10")
except psycopg2.Error as e:
    print("Error: error select")
    print(e)

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()

##### SMI_September

In [None]:
insta_influencers_Sep_df = pd.read_csv(r'C:\Umair_Workspace\Development\Projects\P1_SMI\Social-Media-Influencers\Dataset\Social Media Influencers in 2022\social media influencers - instagram sep-2022.csv')
insta_influencers_Sep_df

In [None]:
insta_influencers_sep_insertstmt = ("INSERT INTO public.insta_influencer_sep \
(sr_no, username, name, subscribers, audience_country, authentic_engagement, engagement_avg, category_1, category_2) \
VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s);")

In [None]:
conn = psycopg2.connect("host=localhost dbname={} user=postgres password=password".format("smi"))
conn.set_session(autocommit=True)
cur = conn.cursor()

In [None]:
try:
    for i, row in insta_influencers_Sep_df.iterrows():
        cur.execute(insta_influencers_sep_insertstmt, list(row))
    print("Data inserted")
except psycopg2.Error as e:
    print("Error: unable to insert data")
    print(e)

In [None]:
try:
    cur.execute("select * from public.insta_influencer_sep limit 10")
except psycopg2.Error as e:
    print("Error: error select")
    print(e)

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()

##### SMI_June

In [34]:
insta_influencers_June_df = pd.read_csv(r'C:\Umair_Workspace\Development\Projects\P1_SMI\Social-Media-Influencers\Dataset\Social Media Influencers in 2022\social media influencers-instagram june 2022 - june 2022.csv')
insta_influencers_June_df

Unnamed: 0,instagram name,influencer name,Category_1,Category_2,Subscribers count,Views avg.,Likes avg,Comments avg.
0,433,433,Sports with a ball,,50.7M,Spain,274.9K,456.7K
1,____kimwoobin,김우빈,Lifestyle,,3.6M,Indonesia,449.4K,592.7K
2,__youngbae__,TAEYANG,Music,,13.4M,Indonesia,269.6K,304.1K
3,_agentgirl_,НАСТЯ ИВЛЕЕВА,Shows,,18.6M,Russia,164.4K,189.4K
4,_hakkencoser_,Hakken 八犬,Lifestyle,Photography,3.5M,United States,525.1K,643K
...,...,...,...,...,...,...,...,...
1017,zidane,zidane,Sports with a ball,,33M,Spain,412.4K,560.8K
1018,zidane,zidane,Sports with a ball,,33M,Spain,412.4K,560.8K
1019,zkdlin,KAI,Music,,14.1M,Indonesia,1.4M,1.7M
1020,zo,Lonzo Ball,Sports with a ball,,14M,United States,145.8K,262K


In [28]:
insta_influencers_june_insertstmt = ("INSERT INTO public.insta_influencer_june \
(username, name, subscribers_count, audience_country, likes_avg, comments_avg, category_1, category_2) \
VALUES(%s, %s, %s, %s, %s, %s, %s, %s);")

In [31]:
for i, row in insta_influencers_June_df.iterrows():
    cur.execute(insta_influencers_june_insertstmt, list(row))

In [38]:
cur.execute("select * from public.insta_influencer_june limit 15")
row = cur.fetchall()
for r in row:
    print(r)
# row = cur.fetchone()
# while row:
#     print(row)
#     row = cur.fetchone()

('433', '433', 'Sports with a ball', 'NaN', '50.7M', 'Spain', '274.9K', '456.7K')
('____kimwoobin', '김우빈', 'Lifestyle', 'NaN', '3.6M', 'Indonesia', '449.4K', '592.7K')
('__youngbae__', 'TAEYANG', 'Music', 'NaN', '13.4M', 'Indonesia', '269.6K', '304.1K')
('_agentgirl_', 'НАСТЯ ИВЛЕЕВА', 'Shows', 'NaN', '18.6M', 'Russia', '164.4K', '189.4K')
('_hakkencoser_', 'Hakken 八犬', 'Lifestyle', 'Photography', '3.5M', 'United States', '525.1K', '643K')
('_hakkencoser_', 'Hakken 八犬', 'Lifestyle', 'Photography', '3.5M', 'United States', '525.1K', '643K')
('_imyour_joy', 'Joy', 'Lifestyle', 'NaN', '14M', 'Indonesia', '1.1M', '1.4M')
('_jeongjaehyun', 'Jaehyun', 'NaN', 'NaN', '11.9M', 'Indonesia', '2.1M', '2.6M')
('_mariahwasa', 'HWASA', 'NaN', 'NaN', '8.2M', 'Brazil', '957.2K', '1.2M')
('_rl9', 'Robert Lewandowski', 'Sports with a ball', 'NaN', '26.6M', 'Poland', '381.9K', '486K')
('_seorina', '설인아 sᴇᴏʀɪɴᴀ', 'Lifestyle', 'Cinema & Actors/actresses', '5.7M', 'South Korea', '728K', '903.5K')
(