### Installing required libraries

In [1]:
!pip install psycopg2

Collecting psycopg2
  Downloading psycopg2-2.9.5-cp37-cp37m-win_amd64.whl (1.2 MB)
     ---------------------------------------- 1.2/1.2 MB 2.4 MB/s eta 0:00:00
Installing collected packages: psycopg2
Successfully installed psycopg2-2.9.5



[notice] A new release of pip available: 22.3.1 -> 23.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import psycopg2
import pandas as pd

### Defining functions

In [6]:
def create_database(dbName):
    # connect to default db
    conn = psycopg2.connect("host=localhost dbname=postgres user=postgres password=password")
    conn.set_session(autocommit=True)
    cur = conn.cursor()
    
    # create desired db
    cur.execute("drop database if exists {}".format(dbName))
    cur.execute("create database {}".format(dbName))
    
    conn.close()
    
    # reconnect to newly created db
    conn = psycopg2.connect("host=localhost dbname={} user=postgres password=password".format(dbName))
    conn.set_session(autocommit=True)
    cur = conn.cursor()
    
    return cur, conn

In [8]:
def create_table(ddl):
    try:
        cur.execute(ddl)
    except psycopg2.Error as e:
        print("Error: failure to create table")
        print(e)
    else:
        print("Table created")

 ### Creating tables


In [9]:
insta_influencers_ddl = ("""CREATE TABLE public.Insta_Influencer (username varchar(100) not null, name varchar(300) not null, followers varchar(50) not null, audience_country varchar(50) not null,
    authentic_engagement varchar(50) not null, engagement_avg varchar(50) not null, category_1 varchar(100) null, category_2 varchar(100) null);""")

In [10]:
insta_influencers_june_ddl = ("CREATE TABLE public.insta_influencer_June ( username varchar(100) NOT NULL, name varchar(300) NOT NULL, \
	subscribers_count varchar(50) NOT NULL, \
	audience_country varchar(50) NOT NULL, \
	likes_avg varchar(50) NOT NULL, \
	comments_avg varchar(50) NOT NULL, \
	category_1 varchar(100) NULL, \
	category_2 varchar(100) NULL \
);")

In [11]:
insta_influencers_sep_ddl = ("CREATE TABLE public.insta_influencer_Sep ( \
	sr_no int not null, \
	username varchar(100) NOT NULL, \
	name varchar(300) NOT NULL, \
	subscribers varchar(50) NOT NULL, \
	audience_country varchar(50) NOT NULL, \
	likes_avg varchar(50) NOT NULL, \
	authentic_engagement varchar(50) not null, \
	engagement_avg varchar(50) NOT NULL, \
	category_1 varchar(100) NULL, \
	category_2 varchar(100) NULL \
);")

In [None]:
cur, conn = create_database("smi")

In [12]:
create_table(insta_influencers_ddl)
create_table(insta_influencers_june_ddl)
create_table(insta_influencers_sep_ddl)

Table created
Table created
Table created


### Loading files and inserting into tables

In [14]:
insta_influencers_df = pd.read_csv(r'C:\Umair_Workspace\Development\Projects\P1_SMI\Social-Media-Influencers\Dataset\Social Media Influencers in 2022\social media influencers - instagram.csv')
print(insta_influencers_df)

    Influencer insta name instagram name                 category_1  \
0                     433            433         Sports with a ball   
1            __youngbae__        TAEYANG                      Music   
2             _agentgirl_  НАСТЯ ИВЛЕЕВА                      Shows   
3             _imyour_joy            Joy                  Lifestyle   
4           _jeongjaehyun        Jaehyun                        NaN   
..                    ...            ...                        ...   
995               zendaya        Zendaya  Cinema & Actors/actresses   
996                zidane         zidane         Sports with a ball   
997                zkdlin            KAI                      Music   
998    zoeisabellakravitz   Zoë Kravitz  Cinema & Actors/actresses   
999               zoesugg      Zoë Sugg                  Lifestyle   

             category_2 Followers Audience country(mostly)  \
0                   NaN     48.5M                    Spain   
1                   NaN

In [21]:
insta_influencers_insertstmt = ("INSERT INTO public.insta_influencer \
        (username, name, category_1, category_2, followers, audience_country, authentic_engagement, engagement_avg) \
        VALUES(%s, %s, %s, %s, %s, %s, %s, %s)")

In [22]:
try:
    for i, row in insta_influencers_df.iterrows():
        cur.execute(insta_influencers_insertstmt, list(row))
except psycopg2.Error as e:
    print("Error: unable to insert data")
    print(e)

In [23]:
try:
    cur.execute("select * from public.insta_influencer limit 10")
except psycopg2.Error as e:
    print("Error: error select")
    print(e)

row = cur.fetchone()
while row:
    print(row)
    row = cur.fetchone()

('433', '433', '48.5M', 'Spain', '383.1K', '637K', 'Sports with a ball', 'NaN')
('__youngbae__', 'TAEYANG', '12.7M', 'Indonesia', '478K', '542.3K', 'Music', 'NaN')
('_agentgirl_', 'НАСТЯ ИВЛЕЕВА', '18.8M', 'Russia', '310.8K', '377.9K', 'Shows', 'NaN')
('_imyour_joy', 'Joy', '13.5M', 'Indonesia', '1.1M', '1.4M', 'Lifestyle', 'NaN')
('_jeongjaehyun', 'Jaehyun', '11.1M', 'Indonesia', '2.5M', '3.1M', 'NaN', 'NaN')
('_mariahwasa', 'HWASA', '7.9M', 'Brazil', '915.1K', '1.2M', 'NaN', 'NaN')
('_rl9', 'Robert Lewandowski', '25M', 'Poland', '588.6K', '749K', 'Sports with a ball', 'NaN')
('_seorina', '설인아 sᴇᴏʀɪɴᴀ', '3M', 'South Korea', '803.9K', '997.8K', 'Lifestyle', 'Cinema & Actors/actresses')
('_shotaroo_', 'SHOTARO ショウタロウ', '4.9M', 'Indonesia', '860.1K', '1.1M', 'NaN', 'NaN')
('03_hu', '박지후', '4.1M', 'South Korea', '711K', '872.6K', 'NaN', 'NaN')
