# 4주차 과제

## SQL 연결 설정

In [36]:
import configparser
import requests
import psycopg2
from pandas import DataFrame

config = configparser.ConfigParser()
config.read('../../config.ini')

user = config['REDSHIFT']['USER']
password = config['REDSHIFT']['PASSWORD']

# Redshift connection 함수
def get_Redshift_connection(user, password):
    host = "grepp-data.cduaw970ssvt.ap-northeast-2.redshift.amazonaws.com"
    redshift_user = user
    redshift_pass = password
    port = 5439
    dbname = "dev"
    conn = psycopg2.connect("dbname={dbname} user={user} host={host} password={password} port={port}".format(
        dbname=dbname,
        user=redshift_user,
        password=redshift_pass,
        host=host,
        port=port
    ))
    conn.set_session(autocommit=True)
    return conn.cursor()

## 과제 1 - colab ETL 버그 개선
* [ETL 실습 CoLab 링크](https://colab.research.google.com/drive/1Dis48HuS633KKeFWQOXk17Jjye0u5OR3?usp=sharing#scrollTo=gjDwY95epWDi)
* 버그: csv의 header까지 읽어서 데이터를 저장하므로 header를 제외하면 된다

### ETL 함수 선언

In [28]:
def extract(url):
    f = requests.get(url)
    return (f.text)

def transform(text):
    lines = text.split("\n")
    return lines

def load(lines):
    cur = get_Redshift_connection(user, password)

    sql = 'BEGIN; TRUNCATE leemingyu05.name_gender; INSERT INTO leemingyu05.name_gender VALUES '
    insert_sql = []

    if len(lines) == 0:
        return RuntimeError('lines is empty')

    (name, gender) = lines[0].split(",")
    if name.lower() == 'name' and gender.lower() == 'gender':
        lines.pop(0)

    for idx, r in enumerate(lines):
        if r == '':
            continue

        (name, gender) = r.split(",")
        print(name, "-", gender)
        insert_sql.append("('{name}', '{gender}')".format(name=name, gender=gender))

    sql = sql + ', '.join(insert_sql) + '; END;'

    cur.execute(sql)



### ETL 작업 실행

In [39]:
link = "https://s3-geospatial.s3-us-west-2.amazonaws.com/name_gender.csv"

data = extract(link)

lines = transform(data)

load(lines)

Adaleigh - F
Amryn - Unisex
Apurva - Unisex
Aryion - M
Alixia - F
Alyssarose - F
Arvell - M
Aibel - M
Atiyyah - F
Adlie - F
Anyely - F
Aamoni - F
Ahman - M
Arlane - F
Armoney - F
Atzhiry - F
Antonette - F
Akeelah - F
Abdikadir - M
Arinze - M
Arshaun - M
Alexandro - M
Ayriauna - F
Aqib - M
Alleya - F
Aavah - F
Anesti - Unisex
Adalaide - F
Analena - F
Alaeyah - F
Albena - F
Aimi - F
Adwaith - M
Arkady - M
Astyn - Unisex
Adelee - F
Agata - F
Alegna - F
Altan - M
Ahnaleigh - F
Algie - Unisex
Ashanti - F
Aislyn - F
Adaleine - F
Anthnoy - M
Algernon - M
Aeryona - F
Adrinne - F
Addell - F
Avril - F
Ahni - F
Aimon - M
Adolpho - M
Ahuva - F
Aurielle - F
Aveana - F
Aliyia - F
Alesander - M
Adnrea - F
Anjae - F
Alvine - F
Adorah - F
Adlemi - F
Alesi - F
Alontae - M
Antonny - M
Adarah - F
Ayreanna - F
Antyon - M
Andia - F
Ashla - F
Aspyn - F
Antwanett - F
Aundreia - F
Audella - F
Amari - Unisex
Arsha - Unisex
Aricella - F
Adan - M
Apasra - F
Alaysha - F
Anderson - M
Aurelius - M
Aerial - F
Averlei

### 결과 확인

In [38]:
cur = get_Redshift_connection(user, password)
cur.execute("select * from leemingyu05.name_gender")
result = cur.fetchall()

print(result)

df = DataFrame(result, columns=['name', 'gender'])
df.head()


[('Adaleigh', 'F'), ('Amryn', 'Unisex'), ('Apurva', 'Unisex'), ('Aryion', 'M'), ('Alixia', 'F'), ('Alyssarose', 'F'), ('Arvell', 'M'), ('Aibel', 'M'), ('Atiyyah', 'F'), ('Adlie', 'F'), ('Anyely', 'F'), ('Aamoni', 'F'), ('Ahman', 'M'), ('Arlane', 'F'), ('Armoney', 'F'), ('Atzhiry', 'F'), ('Antonette', 'F'), ('Akeelah', 'F'), ('Abdikadir', 'M'), ('Arinze', 'M'), ('Arshaun', 'M'), ('Alexandro', 'M'), ('Ayriauna', 'F'), ('Aqib', 'M'), ('Alleya', 'F'), ('Aavah', 'F'), ('Anesti', 'Unisex'), ('Adalaide', 'F'), ('Analena', 'F'), ('Alaeyah', 'F'), ('Albena', 'F'), ('Aimi', 'F'), ('Adwaith', 'M'), ('Arkady', 'M'), ('Astyn', 'Unisex'), ('Adelee', 'F'), ('Agata', 'F'), ('Alegna', 'F'), ('Altan', 'M'), ('Ahnaleigh', 'F'), ('Algie', 'Unisex'), ('Ashanti', 'F'), ('Aislyn', 'F'), ('Adaleine', 'F'), ('Anthnoy', 'M'), ('Algernon', 'M'), ('Aeryona', 'F'), ('Adrinne', 'F'), ('Addell', 'F'), ('Avril', 'F'), ('Ahni', 'F'), ('Aimon', 'M'), ('Adolpho', 'M'), ('Ahuva', 'F'), ('Aurielle', 'F'), ('Aveana', 'F'),

Unnamed: 0,name,gender
0,Adaleigh,F
1,Amryn,Unisex
2,Apurva,Unisex
3,Aryion,M
4,Alixia,F
