In [None]:
#import necessary dependencies
import pandas as pd
import numpy as np
import os
import boto3
from botocore.exceptions import NoCredentialsError
import io
from io import StringIO
from dotenv import load_dotenv
import sys
import configparser


In [None]:
config = configparser.ConfigParser()
config.read('.env')

In [None]:
aws_secret_key = config['AWS']['secret_key']
aws_access_key = config['AWS']['access_key']

In [None]:
#upload raw_data
raw_dubai_data = pd.read_csv("airbnb_listings/DubaiData.csv")
raw_la_data = pd.read_csv("airbnb_listings/LAData.csv")
raw_london_data = pd.read_csv("airbnb_listings/LondonData.csv")
raw_miami_data = pd.read_csv("airbnb_listings/MiamiData.csv")
raw_nyc_data = pd.read_csv("airbnb_listings/NYCData.csv")
raw_sanfransisco_data = pd.read_csv("airbnb_listings/SanFransiscoData.csv")
raw_sydney_data = pd.read_csv("airbnb_listings/SydneyData.csv")
raw_tokyo_data = pd.read_csv("airbnb_listings/TokyoData.csv")
raw_toronto_data = pd.read_csv("airbnb_listings/TorontoData.csv")

In [None]:
#upload to S3_Bucket
s3_client = boto3.client('s3',
                         aws_access_key_id = aws_access_key,
                         aws_secret_access_key =aws_secret_key
                        )

In [None]:
response = s3_client.create_bucket(
            Bucket = 'raw_airbnb_listings_data',
            CreateBucketConfiguration = {
                'LocationConstraint': 'eu-west-2',
            },
            )
print(response)

In [None]:
#upload to S3 bucket
def upload_to_s3(df, Bucket, object_key):
    try:
        buffer = io.BytesIO()
        df.to_csv(buffer, index = False)
        buffer.seek(0)
        s3_client.upload_fileobj(buffer, Bucket, object_key)
        print(f"file uploaded to s3://{Bucket}/{object_key}.csv")
    except NoCredentialsError:
        print("Credentials not available or incorrect")
    except Exception as e:
        print(f"Upload to s3://{Bucket}/{object_key} failed: {e}")
        

In [None]:
#store all dfs in a dictionary

raw_data = {
    "dubaiData": raw_dubai_data,
    "losAngelesData": raw_la_data,
    "londonData": raw_london_data ,
    "miamiData": raw_miami_data,
    "newYorkCityData": raw_nyc_data,
    "sanFranciscoData": raw_sanfransisco_data,
    "sydneyData": raw_sydney_data,
    "tokyoData": raw_tokyo_data,
    "torontoData": raw_toronto_data
}

In [None]:
for df_name, df in raw_data.items():
    bucket = "airbnb_listings-bucket/raw-data"
    object_key = df_name
    upload_to_s3(df, bucket, df_name )
    