### The purpose of this file is to extract, transform, and load the data from the csv files into the database.

In [7]:
!pip install sqlalchemy

Collecting sqlalchemy
  Downloading SQLAlchemy-2.0.16-cp39-cp39-win_amd64.whl (2.0 MB)
     ---------------------------------------- 0.0/2.0 MB ? eta -:--:--
     ----- ---------------------------------- 0.3/2.0 MB 5.4 MB/s eta 0:00:01
     ---------------- ----------------------- 0.8/2.0 MB 10.4 MB/s eta 0:00:01
     ------------------------------ --------- 1.5/2.0 MB 10.5 MB/s eta 0:00:01
     ---------------------------------------- 2.0/2.0 MB 11.5 MB/s eta 0:00:00
Collecting greenlet!=0.4.17
  Downloading greenlet-2.0.2-cp39-cp39-win_amd64.whl (192 kB)
     ---------------------------------------- 0.0/192.1 kB ? eta -:--:--
     ------------------------------------- 192.1/192.1 kB 11.4 MB/s eta 0:00:00
Installing collected packages: greenlet, sqlalchemy
Successfully installed greenlet-2.0.2 sqlalchemy-2.0.16


In [8]:
import pandas as pd
from sqlalchemy import create_engine

In [3]:
# Declare csv file path
csv_file = 'data/AB_NYC_2019.csv'

# Read csv file into dataframe
df = pd.read_csv(csv_file)
df.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,2018-10-19,0.21,6,365
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,2019-05-21,0.38,2,355
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,Manhattan,Harlem,40.80902,-73.9419,Private room,150,3,0,,,1,365
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,2019-07-05,4.64,1,194
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,2018-11-19,0.1,1,0


In [10]:
df.columns

Index(['id', 'name', 'host_id', 'host_name', 'neighbourhood_group',
       'neighbourhood', 'latitude', 'longitude', 'room_type', 'price',
       'minimum_nights', 'number_of_reviews', 'last_review',
       'reviews_per_month', 'calculated_host_listings_count',
       'availability_365'],
      dtype='object')

In [None]:
# Create an SQLAlchemy engine and connect to the SQLite database
engine = create_engine('sqlite:///database.db', echo=True) # Research whether the first argument here is the path to the database or the name of the database
conn = engine.connect()

# Insert the DataFrame into the database
df.to_sql('my_table', conn, if_exists='replace', index=False)

# Close the connection
conn.close()

In [None]:
## With csv and sqlalchemy:

import csv
from sqlalchemy import create_engine, Table, Column, Integer, String, MetaData

# Open the CSV file
csv_file = open('data.csv', 'r')

# Create an SQLAlchemy engine and connect to the SQLite database
engine = create_engine('sqlite:///database.db', echo=True)
conn = engine.connect()

# Define the table structure
metadata = MetaData()
my_table = Table('my_table', metadata,
                 Column('column1', String),
                 Column('column2', String),
                 Column('column3', Integer)
                 )

# Create the table in the database if it does not exist
metadata.create_all(engine)

# Read the CSV file and insert its contents into the table
csv_reader = csv.reader(csv_file)
for row in csv_reader:
    # Extract the data from each row
    column1 = row[0]
    column2 = row[1]
    column3 = int(row[2])  # Assuming the third column is an integer

    # Insert the data into the table
    insert_statement = my_table.insert().values(column1=column1, column2=column2, column3=column3)
    conn.execute(insert_statement)

# Close the connections
conn.close()
csv_file.close()

In [None]:
## With csv and sqlite3:

import csv
import sqlite3

# Open the CSV file
csv_file = open('data.csv', 'r')

# Create a SQLite database connection
conn = sqlite3.connect('database.db')
cursor = conn.cursor()

# Create a table in the database
cursor.execute('''CREATE TABLE IF NOT EXISTS my_table (
                    column1 TEXT,
                    column2 TEXT,
                    column3 INTEGER
                )''')

# Read the CSV file and insert its contents into the database
csv_reader = csv.reader(csv_file)
for row in csv_reader:
    # Extract the data from each row
    column1 = row[0]
    column2 = row[1]
    column3 = int(row[2])  # Assuming the third column is an integer

    # Insert the data into the table
    cursor.execute('''INSERT INTO my_table (column1, column2, column3)
                      VALUES (?, ?, ?)''', (column1, column2, column3))

# Commit the changes and close the connections
conn.commit()
csv_file.close()
conn.close()