In [2]:
# Import the pandas library for data manipulation
import pandas as pd

# Define the file paths for the input CSV files
fear_greed_file = "csv_files\Greed_index.csv"
historical_data_file = "csv_files\historical_data.csv"

# Load the CSV files into pandas DataFrames
try:
    fear_greed_df = pd.read_csv(fear_greed_file)
    historical_data_df = pd.read_csv(historical_data_file)
except FileNotFoundError as e:
    print(f"Error: One of the files was not found. Please ensure both '{fear_greed_file}' and '{historical_data_file}' are in the correct directory.")
    raise e

# --- Data Preprocessing for Merging ---

# Convert the 'date' column in the fear_greed DataFrame to datetime objects
# The format is 'YYYY-MM-DD'
fear_greed_df['date'] = pd.to_datetime(fear_greed_df['date'], format='%Y-%m-%d')

# Convert the 'Timestamp IST' column in the historical_data DataFrame to datetime objects
# The format is 'DD-MM-YYYY HH:MM'
historical_data_df['Timestamp IST'] = pd.to_datetime(historical_data_df['Timestamp IST'], format='%d-%m-%Y %H:%M')

# To merge, we need a common column. We will create a new 'common_date' column
# by extracting just the date part from the datetime objects in both DataFrames.
fear_greed_df['common_date'] = fear_greed_df['date'].dt.date
historical_data_df['common_date'] = historical_data_df['Timestamp IST'].dt.date

# --- Merge the DataFrames ---

# Perform an inner merge on the 'common_date' column.
# An inner merge will only keep rows where the 'common_date' exists in both DataFrames.
# We set suffixes to distinguish between columns with the same name from different files
# (e.g., 'timestamp' from fear_greed and 'timestamp' from historical_data).
merged_df = pd.merge(fear_greed_df, historical_data_df, on='common_date', how='inner', suffixes=('_fear', '_hist'))

# --- Clean up the merged DataFrame ---

# Drop the intermediate 'common_date' column as it's no longer needed
merged_df = merged_df.drop(columns=['common_date'])

# Print the first few rows of the merged DataFrame to verify the result
print("Preview of the merged DataFrame:")
print(merged_df.head())

# --- Save the merged DataFrame to a new CSV file ---

# Define the desired output file path
# You can change this to any specific location and file name you prefer.
output_file = "merged_data.csv"

# Save the DataFrame to the specified path without the index column
merged_df.to_csv(output_file, index=False)

print(f"\nSuccessfully merged the files and saved the result to '{output_file}'")


Preview of the merged DataFrame:
    timestamp  value classification       date  \
0  1682919000     63          Greed 2023-05-01   
1  1682919000     63          Greed 2023-05-01   
2  1682919000     63          Greed 2023-05-01   
3  1701754200     75  Extreme Greed 2023-12-05   
4  1701754200     75  Extreme Greed 2023-12-05   

                                      Account Coin  Execution Price  \
0  0x3998f134d6aaa2b6a5f723806d00fd2bbbbce891  ETH           1897.9   
1  0x3998f134d6aaa2b6a5f723806d00fd2bbbbce891  ETH           1897.9   
2  0x3998f134d6aaa2b6a5f723806d00fd2bbbbce891  ETH           1898.6   
3  0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23  ETH           2230.1   
4  0xb1231a4a2dd02f2276fa3c5e2a2f3436e6bfed23  ETH           2230.1   

   Size Tokens  Size USD Side       Timestamp IST  Start Position  Direction  \
0       0.0967    183.53  BUY 2023-05-01 01:06:00          0.0000  Open Long   
1       0.0824    156.39  BUY 2023-05-01 01:06:00          0.0967  Open Long   