In [1]:
import boto3
import pandas as pd
from io import StringIO

def read_csv_from_s3(bucket_name, file_key):
    # Create an S3 client
    s3_client = boto3.client('s3')

    # Get the CSV file content from S3
    s3_object = s3_client.get_object(Bucket=bucket_name, Key=file_key)
    file_content = s3_object['Body'].read().decode('utf-8')

    # Read the content into a pandas DataFrame
    df = pd.read_csv(StringIO(file_content))
    return df

# Example usage
bucket_name = 'sagemaker-bucket-ds'
file_key = '01_STOCKS/DATA/JOINED/joined_dataset.csv'

df = read_csv_from_s3(bucket_name, file_key)

         date  ^spx_otwarcie_x  ^spx_najwyzszy_x  ^spx_najnizszy_x  \
0  2000-01-03          1469.25           1478.00           1438.36   
1  2000-01-04          1455.22           1455.22           1397.43   
2  2000-01-05          1399.42           1413.27           1377.68   
3  2000-01-06          1402.11           1411.90           1392.10   
4  2000-01-07          1403.45           1441.47           1400.73   

   ^spx_zamkniecie_x  ^spx_wolumen_x  aapl.us_otwarcie_x  aapl.us_najwyzszy_x  \
0            1455.22     517666667.0            0.798165             0.855889   
1            1399.42     560555556.0            0.823887             0.842031   
2            1402.11     603055556.0            0.789591             0.841433   
3            1403.45     606833333.0            0.807736             0.814416   
4            1441.47     680666667.0            0.734260             0.768656   

   aapl.us_najnizszy_x  aapl.us_zamkniecie_x  ...  mcd.us_otwarcie_y  \
0             0.7738

In [2]:
# Keep only the 'date' column and columns which have 'zamkniecie' in their name
filtered_columns = [col for col in df.columns if 'zamkniecie' in col.lower() or col.lower() == 'date']
df_filtered = df[filtered_columns]

In [3]:
df_filtered

Unnamed: 0,date,^spx_zamkniecie_x,aapl.us_zamkniecie_x,^spx_zamkniecie_y,aapl.us_zamkniecie_y,mcd.us_zamkniecie_x,msft.us_zamkniecie_x,mcd.us_zamkniecie_y,msft.us_zamkniecie_y
0,2000-01-03,1455.22,0.851702,1455.22,0.851702,28.1546,41.0875,28.1546,41.0875
1,2000-01-04,1399.42,0.780021,1399.42,0.780021,27.5734,39.7047,27.5734,39.7047
2,2000-01-05,1402.11,0.791386,1402.11,0.791386,28.0229,40.1206,28.0229,40.1206
3,2000-01-06,1403.45,0.722995,1403.45,0.722995,27.6128,38.7774,27.6128,38.7774
4,2000-01-07,1441.47,0.757589,1441.47,0.757589,28.3307,39.2808,28.3307,39.2808
...,...,...,...,...,...,...,...,...,...
6032,2023-12-22,4754.63,193.600000,4754.63,193.600000,291.7000,374.5800,291.7000,374.5800
6033,2023-12-26,4774.75,193.050000,4774.75,193.050000,292.8600,374.6600,292.8600,374.6600
6034,2023-12-27,4781.58,193.150000,4781.58,193.150000,294.5500,374.0700,294.5500,374.0700
6035,2023-12-28,4783.35,193.580000,4783.35,193.580000,295.8400,375.2800,295.8400,375.2800
