In [None]:
import boto3
import pandas as pd
from io import StringIO

def lambda_handler(event, context):
    # Define the key of the file you want to process
    source1 = 'NYC_airbnb.csv'
    source2 = 'updated_airbnb_data_with_locations_subset.csv'
    
    # Define your source and destination S3 bucket names
    source_bucket_name = 'group-1-final-project-sourcedata'
    destination_bucket_name = 'group-1-final-project-finaldata'
    
    # Initialize S3 client
    s3 = boto3.client('s3')
    
    # Fetch the files from S3
    response1 = s3.get_object(Bucket=source_bucket_name, Key=source1)
    response2 = s3.get_object(Bucket=source_bucket_name, Key=source2)
    
    # Read the content of the files
    content1 = response1['Body'].read().decode('utf-8')
    content2 = response2['Body'].read().decode('utf-8')
    
    # Convert the content to pandas DataFrames
    airbnb_df = pd.read_csv(StringIO(content1))
    additional_df = pd.read_csv(StringIO(content2))

    # Data Merging
    # Assuming 'id' is a common key in both datasets
    merged_df = pd.merge(airbnb_df, additional_df, on='id', how='left')
    
    # Select columns, adjust according to the structure of your additional data
    columns_to_select = [
        'id','name','host_id','host_name','neighbourhood_group','neighbourhood','latitude', 'longitude','room_type' 'price', 'minimum_nights',
        'number_of_reviews', 'reviews_per_month', 'calculated_host_listings_count',
        'availability_365', 'number_of_reviews_ltm', 'detailed_location'
    ]
    
    # Selecting the columns from the dataset
    selected_df = merged_df[columns_to_select]
    
    # Convert the transformed DataFrame back to CSV format
    updated_csv = selected_df.to_csv(index=False)
    
    # Define the destination key where you want to store the transformed data
    destination_key = 'final_airbnb_dataset.csv'

    # Upload the transformed data to the destination S3 bucket
    s3.put_object(Body=updated_csv, Bucket=destination_bucket_name, Key=destination_key)
    
    # Return the response
    return {
        'statusCode': 200,
        'body': 'Transformation completed successfully!'
    }
