In [2]:
import csv
import random
import string
import time
from datetime import datetime

def generate_csv_files(num_rows, num_files):
    """
    Generate CSV files with specified number of rows and files.

    Args:
        num_rows (int): Number of rows per CSV file.
        num_files (int): Total number of CSV files to create.
    """
    # Define the columns
    columns = [
        "exported", "productid", "product", "price", "depacher", "arrival",
        "dateofcreation", "location", "age", "group", "usage", "product_age"
    ]

    for file_counter in range(1, num_files + 1):
        file_name = f"data_file_{file_counter}.csv"
        rows = []

        # Generate specified number of rows for the current file
        for _ in range(num_rows):
            row = {
                "exported": random.choice(["Yes", "No"]),
                "productid": ''.join(random.choices(string.ascii_uppercase + string.digits, k=10)),
                "product": f"Product-{random.randint(1, 500)}",
                "price": round(random.uniform(10.0, 1000.0), 2),
                "depacher": f"Dep-{random.randint(1, 50)}",
                "arrival": f"Arr-{random.randint(1, 50)}",
                "dateofcreation": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "location": random.choice(["Mumbai", "Delhi", "Bangalore", "Pune"]),
                "age": random.randint(18, 60),
                "group": random.choice(["A", "B", "C", "D"]),
                "usage": random.choice(["Low", "Medium", "High"]),
                "product_age": random.randint(1, 10),
            }
            rows.append(row)

        # Write data to a CSV file
        with open(file_name, mode="w", newline="") as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=columns)
            writer.writeheader()
            writer.writerows(rows)

        print(f"Generated file: {file_name}")

        # Wait 10 seconds before generating the next file (if more files are to be created)
        if file_counter < num_files:
            time.sleep(10)

# Example usage: Create 5 CSV files with 100 rows each
generate_csv_files(num_rows=100, num_files=2)

Generated file: data_file_1.csv
Generated file: data_file_2.csv


In [13]:
import csv
import random
import string
import time
from datetime import datetime
import boto3
from botocore.exceptions import NoCredentialsError, PartialCredentialsError

def generate_csv_files_to_s3(num_rows, num_files, bucket_name, s3_folder, wait_time=10):
    """
    Generate CSV files and upload them to an S3 bucket.

    Args:
        num_rows (int): Number of rows per CSV file.
        num_files (int): Total number of CSV files to create.
        bucket_name (str): Name of the S3 bucket.
        s3_folder (str): Folder path within the S3 bucket where files will be stored.
        wait_time (int): Time to wait (in seconds) between generating files. Default is 10 seconds.

    Examples for wait_time:
        - 30 minutes: wait_time = 30 * 60  (1800 seconds)
        - 1 hour: wait_time = 60 * 60  (3600 seconds)
        - 4 hours: wait_time = 4 * 60 * 60  (14400 seconds)
        - 1 day: wait_time = 24 * 60 * 60  (86400 seconds)
    """
    # Define the columns
    columns = [
        "exported", "productid", "product", "price", "depacher", "arrival",
        "dateofcreation", "location", "age", "group", "usage", "product_age"
    ]

    # Initialize S3 client
    s3_client = boto3.client("s3")

    for file_counter in range(1, num_files + 1):
        file_name = f"data_file_{file_counter}.csv"
        rows = []

        # Generate specified number of rows for the current file
        for _ in range(num_rows):
            row = {
                "exported": random.choice(["Yes", "No"]),
                "productid": ''.join(random.choices(string.ascii_uppercase + string.digits, k=10)),
                "product": f"Product-{random.randint(1, 500)}",
                "price": round(random.uniform(10.0, 1000.0), 2),
                "depacher": f"Dep-{random.randint(1, 50)}",
                "arrival": f"Arr-{random.randint(1, 50)}",
                "dateofcreation": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "location": random.choice(["Mumbai", "Delhi", "Bangalore", "Pune"]),
                "age": random.randint(18, 60),
                "group": random.choice(["A", "B", "C", "D"]),
                "usage": random.choice(["Low", "Medium", "High"]),
                "product_age": random.randint(1, 10),
            }
            rows.append(row)

        # Write data to a local CSV file
        with open(file_name, mode="w", newline="") as csv_file:
            writer = csv.DictWriter(csv_file, fieldnames=columns)
            writer.writeheader()
            writer.writerows(rows)

        print(f"Generated file: {file_name}")

        # Upload the CSV file to the S3 bucket
        s3_key = f"{s3_folder}/{file_name}"
        try:
            s3_client.upload_file(file_name, bucket_name, s3_key)
            print(f"Uploaded {file_name} to s3://{bucket_name}/{s3_key}")
        except (NoCredentialsError, PartialCredentialsError) as e:
            print(f"Failed to upload {file_name} to S3: {e}")
        except Exception as e:
            print(f"An error occurred while uploading {file_name}: {e}")

        # Wait for the specified time before generating the next file (if more files are to be created)
        if file_counter < num_files:
            print(f"Waiting {wait_time} seconds before generating the next file...")
            time.sleep(wait_time)

# Example usage: Generate 2 CSV files with 100 rows each and upload them to S3
generate_csv_files_to_s3(
    num_rows=1000, 
    num_files=1, 
    bucket_name="yash-de", 
    s3_folder="generated_data",
    wait_time=10
)

PartialCredentialsError: Partial credentials found in env, missing: AWS_SECRET_ACCESS_KEY

In [1]:
import os
import sys
import boto3
import pandas as pd
import numpy as np
from dotenv import load_dotenv
load_dotenv()

class AWS_S3Manager:
    def __init__(self, bucket_name, aws_access_key_id, aws_secret_access_key, region_name):
        try:
            if not bucket_name or not aws_access_key_id or not aws_secret_access_key or not region_name:
                raise ValueError("All AWS credentials and bucket name must be provided.")

            self.bucket_name = bucket_name
            self.s3_client = boto3.client(
                service_name='s3',
                region_name=region_name,
                aws_access_key_id=aws_access_key_id,
                aws_secret_access_key=aws_secret_access_key
            )
            # Check if bucket exists
            self.s3_client.head_bucket(Bucket=self.bucket_name)
            print("Connected to AWS S3 bucket")
        except self.s3_client.exceptions.NoSuchBucket:
            raise Exception(f"The bucket '{bucket_name}' does not exist.", sys)
        except Exception as e:
            raise Exception(e, sys)

    def upload_file(self, local_filename, s3_key):
        """
        Upload file to S3 bucket.
        Args:
            local_filename (str): Local file path.
            s3_key (str): S3 object key.
        """
        try:
            if not os.path.exists(local_filename):
                raise FileNotFoundError(f"The file '{local_filename}' does not exist.")
            if not s3_key:
                raise ValueError("S3 key cannot be empty.")

            self.s3_client.upload_file(local_filename, self.bucket_name, s3_key)
            print(f"File '{local_filename}' uploaded to S3 bucket as '{s3_key}'.")
        except Exception as e:
            raise Exception(e, sys)

    def upload_dataframe_to_s3(self, df, s3_key):
        """
        Uploads a Pandas DataFrame to the S3 bucket with the specified key.
        Args:
            df (pd.DataFrame): The DataFrame to upload.
            s3_key (str): The S3 key (object key) under which to store the DataFrame.
        """
        try:
            if not isinstance(df, pd.DataFrame):
                raise ValueError("The provided object is not a valid Pandas DataFrame.")
            if df.empty:
                raise ValueError("The DataFrame is empty and cannot be uploaded.")
            if not s3_key:
                raise ValueError("S3 key cannot be empty.")

            # Convert DataFrame to CSV format
            csv_buffer = df.to_csv(index=False).encode('utf-8')

            # Upload the CSV data to S3
            self.s3_client.put_object(Bucket=self.bucket_name, Key=s3_key, Body=csv_buffer)
            print(f"DataFrame uploaded to S3 bucket as '{s3_key}'.")
        except Exception as e:
            raise Exception(e, sys)

    def download_file(self, s3_key, local_filename, target_directory="data"):
        """
        Download file from S3 bucket.
        Args:
            s3_key (str): S3 object key.
            local_filename (str): Local file name to save as.
            target_directory (str): Directory to save the file.
        """
        try:
            if not s3_key:
                raise ValueError("S3 key cannot be empty.")
            if not local_filename:
                raise ValueError("Local filename cannot be empty.")
            
            if not os.path.exists(target_directory):
                os.makedirs(target_directory)  # Create the target directory if it doesn't exist

            # Construct the full local path
            local_path = os.path.join(target_directory, local_filename)

            # Check if the object exists in S3
            self.s3_client.head_object(Bucket=self.bucket_name, Key=s3_key)

            # Download the file
            self.s3_client.download_file(self.bucket_name, s3_key, local_path)
            print(f"File '{s3_key}' downloaded from S3 bucket and saved as '{local_path}'.")
        except self.s3_client.exceptions.NoSuchKey:
            raise Exception(f"The key '{s3_key}' does not exist in the bucket.", sys)
        except Exception as e:
            print("Error")

    def read_csv_from_s3(self, s3_key):
        """
        Reads CSV file from S3 bucket.
        Args:
            s3_key (str): S3 object key.
        Returns:
            pd.DataFrame: DataFrame containing the data.
        """
        try:
            if not s3_key:
                raise ValueError("S3 key cannot be empty.")
            
            # Check if the object exists in S3
            self.s3_client.head_object(Bucket=self.bucket_name, Key=s3_key)

            obj = self.s3_client.get_object(Bucket=self.bucket_name, Key=s3_key)
            df = pd.read_csv(obj['Body'])
            return df
        except self.s3_client.exceptions.NoSuchKey:
            raise Exception(f"The key '{s3_key}' does not exist in the bucket.", sys)
        except Exception as e:
            print("Error")

    def move_data(self, source_folder, destination_folder):
        """
        Moves data in S3 bucket from source_folder to destination_folder.
        Args:
            source_folder (str): Source folder path in S3.
            destination_folder (str): Destination folder path in S3.
        """
        try:
            if not source_folder or not destination_folder:
                raise ValueError("Source and destination folders cannot be empty.")

            # List objects in the source folder
            response = self.s3_client.list_objects_v2(Bucket=self.bucket_name, Prefix=source_folder)
            if 'Contents' in response:
                for obj in response['Contents']:
                    if not obj['Key'].endswith('/'):  # Ignore folders
                        # Construct the new key destination path
                        new_key = obj['Key'].replace(source_folder, destination_folder, 1)
                        # Copy the object to the new location
                        self.s3_client.copy_object(
                            Bucket=self.bucket_name,
                            CopySource={'Bucket': self.bucket_name, 'Key': obj['Key']},
                            Key=new_key
                        )
                        # Delete the original object
                        self.s3_client.delete_object(Bucket=self.bucket_name, Key=obj['Key'])
                        print(f"Moved {obj['Key']} to {new_key}")
            else:
                print(f"No objects found in {source_folder}.")
        except Exception as e:
            print(f"An error occurred while moving data: {e}")

In [2]:
aws_manager = AWS_S3Manager(
    bucket_name=os.getenv("S3_BUCKET_NAME"),
    aws_access_key_id=os.getenv("AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("AWS_SECRET_ACCESS_KEY_ID"),
    region_name=os.getenv("AWS_REGION_NAME")
)

Connected to AWS S3 bucket


In [3]:
df = aws_manager.read_csv_from_s3(
    s3_key="generated_data/data_file_1.csv"
)

In [4]:
df.head()

Unnamed: 0,exported,productid,product,price,depacher,arrival,dateofcreation,location,age,group,usage,product_age
0,No,HFMMW7ZDAD,Product-83,824.57,Dep-46,Arr-19,2025-01-23 17:41:06,Delhi,38,B,Low,4
1,No,CE78DVX34S,Product-224,832.19,Dep-13,Arr-32,2025-01-23 17:41:06,Pune,40,D,High,3
2,No,L67C3E0V55,Product-465,604.03,Dep-42,Arr-13,2025-01-23 17:41:06,Mumbai,59,A,Low,4
3,Yes,YQAXEY1M9X,Product-357,726.1,Dep-20,Arr-13,2025-01-23 17:41:06,Delhi,20,B,High,1
4,Yes,4WZY3XWNU8,Product-237,271.35,Dep-43,Arr-41,2025-01-23 17:41:06,Delhi,48,C,Low,7


In [9]:
import csv
import random
import string
import time
from datetime import datetime
def process_data(dataframe):
    def transform_data(df_to_transform):
        current_time = datetime.now()
        df_to_transform['transformation_date'] = current_time.strftime('%Y-%m-%d')
        df_to_transform['transformation_time'] = current_time.strftime('%H:%M:%S')
        
        # Add transformations
        df_to_transform['price_category'] = df_to_transform['price'].apply(
            lambda x: 'Low' if x < 200 else 'Medium' if x < 600 else 'High'
        )
        df_to_transform['transit_time'] = df_to_transform.apply(
            lambda row: abs(int(row['depacher'].split('-')[-1]) - int(row['arrival'].split('-')[-1])), axis=1
        )
        df_to_transform['normalized_product_age'] = (
            (df_to_transform['product_age'] - df_to_transform['product_age'].min()) /
            (df_to_transform['product_age'].max() - df_to_transform['product_age'].min())
        )
        df_to_transform['region'] = df_to_transform['location'].apply(
            lambda loc: 'North' if loc in ['Delhi', 'Mumbai'] else 'South'
        )
        df_to_transform['export_flag'] = 1
        df_to_transform['high_value_flag'] = df_to_transform.apply(
            lambda row: 1 if row['price'] > 600 and row['usage'] == 'High' else 0, axis=1
        )
        df_to_transform['age_group'] = df_to_transform['age'].apply(
            lambda age: 'Youth' if age < 30 else 'Adult' if age < 50 else 'Senior'
        )
        return df_to_transform

    # Split the data into two DataFrames
    exported_yes_df = dataframe[dataframe['exported'] == 'Yes'].copy()
    exported_no_df = dataframe[dataframe['exported'] == 'No'].copy()

    # Transform the `exported == Yes` DataFrame
    transformed_exported_yes_df = transform_data(exported_yes_df)

    # Simulate processing `exported == No` after 5 minutes
    print("Waiting 5 minutes to process 'exported == No' rows...")
    time.sleep(60)  # Simulates 5 minutes delay

    # Update `exported == No` DataFrame and process
    current_time = datetime.now()
    exported_no_df['exported'] = 'Yes'
    exported_no_df['dateofcreation'] = current_time.strftime('%Y-%m-%d %H:%M:%S')

    # Apply transformations to `exported == No`
    transformed_exported_no_df = transform_data(exported_no_df)

    # Concatenate the transformed DataFrames
    final_df = pd.concat([transformed_exported_yes_df, transformed_exported_no_df])
    
    finaldf = final_df.sample(frac=1).reset_index(drop=True)

    return final_df

In [10]:
final_df = process_data(df)
final_df.head()

Waiting 5 minutes to process 'exported == No' rows...


Unnamed: 0,exported,productid,product,price,depacher,arrival,dateofcreation,location,age,group,...,product_age,transformation_date,transformation_time,price_category,transit_time,normalized_product_age,region,export_flag,high_value_flag,age_group
3,Yes,YQAXEY1M9X,Product-357,726.1,Dep-20,Arr-13,2025-01-23 17:41:06,Delhi,20,B,...,1,2025-01-24,21:40:52,High,7,0.0,North,1,1,Youth
4,Yes,4WZY3XWNU8,Product-237,271.35,Dep-43,Arr-41,2025-01-23 17:41:06,Delhi,48,C,...,7,2025-01-24,21:40:52,Medium,2,0.666667,North,1,0,Adult
5,Yes,213HYDA0XS,Product-493,150.1,Dep-17,Arr-5,2025-01-23 17:41:06,Mumbai,55,C,...,6,2025-01-24,21:40:52,Low,12,0.555556,North,1,0,Senior
6,Yes,2XLMOBBF6Y,Product-48,954.71,Dep-12,Arr-15,2025-01-23 17:41:06,Bangalore,31,C,...,3,2025-01-24,21:40:52,High,3,0.222222,South,1,0,Adult
8,Yes,9N0OOXJ4IM,Product-110,334.03,Dep-30,Arr-37,2025-01-23 17:41:06,Mumbai,28,D,...,7,2025-01-24,21:40:52,Medium,7,0.666667,North,1,0,Youth


In [11]:
import boto3
import pandas as pd

def save_dataframe_to_s3(
    df, 
    bucket_name="your-s3-bucket", 
    aws_access_key=None,
    aws_secret_key=None,
    folder="transform_json_de_products_data",
):
    """
    Save the provided DataFrame directly as JSON to S3.
    The file name is dynamically determined by the existing files in the S3 folder.
    """
    # Initialize S3 client
    s3_client = boto3.client(
        "s3",
        aws_access_key_id=aws_access_key, 
        aws_secret_access_key=aws_secret_key,
    )
    
    # Get the existing files in the specified S3 folder
    existing_files = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=folder).get('Contents', [])
    
    # If the folder is empty, existing_files will be empty, handle this case
    if existing_files:
        # Extract file names to count how many files already exist
        existing_files = [f['Key'] for f in existing_files]
    else:
        existing_files = []

    # Determine the next file number (e.g., data_file1.json, data_file2.json, ...)
    file_number = len(existing_files) + 1
    json_filename = f"{folder}/data_file{file_number}.json"

    # Convert the DataFrame to JSON string (in records format, one line per record)
    json_data = df.to_json(orient="records", lines=True)

    # Upload the JSON string directly to S3
    s3_client.put_object(Bucket=bucket_name, Key=json_filename, Body=json_data)
    print(f"Data saved to S3 bucket '{bucket_name}' at location '{json_filename}'")

    return json_filename

In [12]:
final_df = final_df  
bucket_name = 'yash-de'  # Replace with your actual S3 bucket name
json_file = save_dataframe_to_s3(
    final_df, 
    bucket_name=bucket_name,
    aws_access_key=os.getenv("AWS_ACCESS_KEY_ID"),
    aws_secret_key=os.getenv("AWS_SECRET_ACCESS_KEY_ID"),
    )

Data saved to S3 bucket 'yash-de' at location 'transform_json_de_products_data/data_file2.json'


In [28]:
import psycopg2
from psycopg2.extras import RealDictCursor
import pandas as pd
from io import StringIO
import textwrap
import openpyxl

class PostgresConnector:
    def __init__(self, connection) -> None:
        """
        Initializes the PostgresConnector with a connection and sets the cursor to use RealDictCursor.

        Args:
            connection (dict): A dictionary containing the connection parameters for psycopg2.
        """
        self.conn = psycopg2.connect(**connection)
        self.cursor = self.conn.cursor(cursor_factory=RealDictCursor)  
        self._set_timezone_utc()

    def _set_timezone_utc(self):
        """
        Set the timezone for the database connection to UTC.
        """
        if self.conn:
            self.cursor.execute("SET TIME ZONE 'UTC';")
            
    def execute_cr_table_query(self, query):
        """
        Execute a single query.
        """
        try:
            if self.cursor:
                self.cursor.execute(query)
                self.conn.commit()
                print("Query executed successfully.")
            else:
                print("No active database connection.")
        except psycopg2.Error as e:
            print("Error executing query:", e)

    def execute_query(self, query, params=None, print_query: bool = True):
        """
        Execute a SQL query and return the results as a list of dictionaries where each dictionary
        represents a row with column names as keys.

        Args:
            query (str): The SQL query string to be executed.
            params (tuple, optional): parameters to pass with the query.
            print_query (bool, optional): For printing query on the terminal.

        Returns:
            list: A list of dictionaries, where each dictionary represents a row with column names as keys,
                  if the query is a SELECT statement.
            int: The number of affected rows for non-SELECT queries.
        """
        try:
            if print_query:
                print("\n", textwrap.dedent(query))
            self.cursor.execute(query, params)
            if query.strip().upper().startswith(("SELECT", "WITH")):
                result = self.cursor.fetchall()
                result_list = [dict(row) for row in result]
                return result_list
            else:
                affected_rows = self.cursor.rowcount
                self.conn.commit()
                return affected_rows
        except Exception as e:
            self.conn.rollback()
            print("\n", textwrap.dedent(query))
            raise e

    def close(self):
        """
        Closes the database connection and cursor.
        """
        self.cursor.close()
        self.conn.close()

    def copy_from_csv(self, csv_buffer, schema_name, table_name) -> None:
        """
        Copy data from a CSV buffer to a database table.

        Args:
            csv_buffer (Any): A file-like object containing CSV data.
            schema_name (str): The name of the schema containing the table.
            table_name (str): The name of the table to copy data into.

        Raises:
            psycopg2.Error: If there's an error during the copy operation.
        """
        copy_query = (
            f"COPY {schema_name}.{table_name} FROM STDIN WITH CSV HEADER DELIMITER ',' NULL AS ''"
        )
        try:
            self.cursor.copy_expert(copy_query, csv_buffer)
            self.conn.commit()
        except psycopg2.Error as e:
            self.conn.rollback()
            raise e

    def insert_from_excel(self, schema_name, table_name, file_path):
        """
        Create a table and insert data from an Excel file into the specified schema and table.
        """
        try:
            if not self.cursor:
                print("No active database connection.")
                return

            # Load Excel file
            workbook = openpyxl.load_workbook(file_path)
            sheet = workbook.active

            # Extract headers and create table columns
            columns = [cell.value for cell in sheet[1]]
            column_definitions = ', '.join([f'"{col}" TEXT' for col in columns])

            # Create schema if not exists
            self.execute_query(f"CREATE SCHEMA IF NOT EXISTS {schema_name};")

            # Create table if not exists
            create_table_query = f"""
                CREATE TABLE IF NOT EXISTS {schema_name}.{table_name} (
                    {column_definitions}
                );
            """
            self.execute_query(create_table_query)

            # Prepare insert query
            placeholders = ', '.join(['%s' for _ in columns])
            quoted_columns = ', '.join([f'"{col}"' for col in columns])
            insert_query = f"INSERT INTO {schema_name}.{table_name} ({quoted_columns}) VALUES ({placeholders});"

            # Insert data from Excel into the table
            for row in sheet.iter_rows(min_row=2, values_only=True):
                self.cursor.execute(insert_query, row)

            # Commit all changes
            self.conn.commit()
            print("Data inserted successfully from Excel.")

        except Exception as e:
            print("Error during Excel data insertion:", e)

    def copy_from_dataframe(self, df: pd.DataFrame, schema_name: str, table_name: str) -> None:
        """
        Copy a pandas DataFrame directly into a PostgreSQL table.

        Args:
            df (pd.DataFrame): The pandas DataFrame to copy.
            schema_name (str): The schema name of the target table.
            table_name (str): The target table name.

        Raises:
            Exception: If any error occurs during the process.
        """
        try:
            if df.empty:
                print("DataFrame is empty. No data to copy.")
                return

            # Convert DataFrame to a CSV buffer
            csv_buffer = StringIO()
            df.to_csv(csv_buffer, index=False, header=True)
            csv_buffer.seek(0)

            # Use the copy_from_csv method to copy data into the database
            self.copy_from_csv(csv_buffer, schema_name, table_name)
            print("DataFrame copied successfully to the table.")

        except Exception as e:
            print("Error during DataFrame copy:", e)
            self.conn.rollback()
            raise e

In [34]:
CRED = {
    'database': '**********',
    'user': 'yashm',
    'password': '************',
    'host': '**********',
    'port': '5432'
}
print(CRED)

{'database': '**********', 'user': 'yashm', 'password': '************', 'host': '**********', 'port': '5432'}


In [30]:
pg_conn = PostgresConnector(CRED)

In [31]:
pg_conn.execute_query("CREATE SCHEMA IF NOT EXISTS ym_de_products;")


 CREATE SCHEMA IF NOT EXISTS ym_de_products;


-1

In [32]:
create_table_query = """
CREATE TABLE IF NOT EXISTS ym_de_products.product_table (
    exported TEXT,
    productid TEXT,
    product TEXT,
    price FLOAT8,
    depacher TEXT,
    arrival TEXT,
    dateofcreation DATE,
    location TEXT,
    age INT,
    "group" TEXT,
    usage TEXT,
    product_age INT,
    transformation_date DATE,
    transformation_time TIME,
    price_category TEXT,
    transit_time INT,
    normalized_product_age FLOAT8,
    region TEXT,
    export_flag BOOLEAN,
    high_value_flag BOOLEAN,
    age_group TEXT
);
"""
pg_conn.execute_cr_table_query(create_table_query)

Query executed successfully.


In [33]:
pg_conn.copy_from_dataframe(final_df, 'ym_de_products', 'product_table')

DataFrame copied successfully to the table.


In [None]:
aws_manager.move_data(
    source_folder="generated_data/data_file_1.csv", 
    destination_folder="pgenerated_data_archive_files/data_file_1.csv"
)