In [1]:
import pandas as pd
import shutil

In [2]:
# Define the source directory and the filenames of the files to copy
source_dir = 'Yearly_Data_Files'
file_names = ['Fuels_data.csv', 'Generators_variability.csv', 'Load_data.csv', '2021prices.csv', 'IntGenbyFuel2021_hourly.csv']

# Loop through each file and copy it to the current directory
for file_name in file_names:
    shutil.copy(f"{source_dir}/{file_name}", f"./{file_name}")

# Load each CSV file into a separate pandas DataFrame
df1 = pd.read_csv('Fuels_data.csv')
df2 = pd.read_csv('Generators_variability.csv')
df3 = pd.read_csv('Load_data.csv')
df4 = pd.read_csv('2021prices.csv')
df5 = pd.read_csv('IntGenbyFuel2021_hourly.csv')

# Define the start and end dates as strings in 'MM/DD' format
start_date = '01/01'
end_date = '12/31'

#create a 'Time_Index' column in df5, going from 1 to the length of df5
df5['Time_Index'] = range(1, len(df5) + 1)

# Create a new DataFrame column with the year 2021 of the Time_Index for each DataFrame
df1['Year'] = pd.to_datetime(df1['Time_Index'], unit='h').dt.year
df2['Year'] = pd.to_datetime(df2['Time_Index'], unit='h').dt.year
df3['Year'] = pd.to_datetime(df3['Time_Index'], unit='h').dt.year
df4['Year'] = pd.to_datetime(df4['Time_Index'], unit='h').dt.year
df5['Year'] = pd.to_datetime(df5['Time_Index'], unit='h').dt.year

# Add the year to the start and end dates and convert to datetime objects
start_date = pd.to_datetime(f"{df1['Year'].unique()[0]}/{start_date}")
end_date = pd.to_datetime(f"{df1['Year'].unique()[0]}/{end_date}")

# Filter each DataFrame to only include rows with dates and hour in the specified range
filtered_df1_without_timeindex_0 = df1[(pd.to_datetime(df1['Time_Index'], unit='h').dt.date >= start_date.date()) & 
                   (pd.to_datetime(df1['Time_Index'], unit='h').dt.date <= end_date.date()) &
                   (df1['Time_Index'] != 0)]
filtered_df1_with_timeindex_0 = df1[df1['Time_Index'] == 0]

# Append the row with Time_Index = 0 at the beginning of the filtered_df1 DataFrame
filtered_df1 = pd.concat([filtered_df1_with_timeindex_0, filtered_df1_without_timeindex_0])

# Sort each filtered DataFrame by the Time_Index column
sorted_df1 = filtered_df1.sort_values('Time_Index')
sorted_df2 = df2[(pd.to_datetime(df2['Time_Index'], unit='h').dt.date >= start_date.date()) & 
                   (pd.to_datetime(df2['Time_Index'], unit='h').dt.date <= end_date.date())].sort_values('Time_Index')
sorted_df3 = df3[(pd.to_datetime(df3['Time_Index'], unit='h').dt.date >= start_date.date()) & 
                   (pd.to_datetime(df3['Time_Index'], unit='h').dt.date <= end_date.date())].sort_values('Time_Index')
sorted_df4 = df4[(pd.to_datetime(df4['Time_Index'], unit='h').dt.date >= start_date.date()) &
                   (pd.to_datetime(df4['Time_Index'], unit='h').dt.date <= end_date.date())].sort_values('Time_Index')
sorted_df5 = df5[(pd.to_datetime(df5['Time_Index'], unit='h').dt.date >= start_date.date()) &
                    (pd.to_datetime(df5['Time_Index'], unit='h').dt.date <= end_date.date())].sort_values('Time_Index')

# Input the load parameters
# Voll
sorted_df3.iloc[0, 1] = 9000
# Demand_Segment
sorted_df3.iloc[0, 2] = 1
# Cost_of_Demand_Curtailment_per_MW
sorted_df3.iloc[0, 3] = 1
# Max_Demand_Curtailment
sorted_df3.iloc[0, 4] = 10000000000000000
# Rep_Periods
sorted_df3.iloc[0, 5] = 1
# Timesteps_per_Rep_Period
sorted_df3.iloc[0, 6] = len(sorted_df3)/sorted_df3.iloc[0, 5]
# Sub_Weights
sorted_df3.iloc[0, 7] = 1

# Overwrite each original CSV file with the sorted, filtered DataFrame
sorted_df1.to_csv('Fuels_data.csv', index=False)
sorted_df2.to_csv('Generators_variability.csv', index=False)
sorted_df3.to_csv('Load_data.csv', index=False)
sorted_df4.to_csv('2021prices.csv', index=False)
sorted_df5.to_csv('IntGenbyFuel2021_hourly.csv', index=False)

# Print the length of each DataFrame
print(f"Length of Fuels_data.csv: {len(sorted_df1)}")
print(f"Length of Generators_variability.csv: {len(sorted_df2)}")
print(f"Length of Load_data.csv: {len(sorted_df3)}")
print(f"2021prices.csv: {len(sorted_df4)}")
print(f"IntGenbyFuel2021_hourly.csv: {len(sorted_df5)}")

Length of Fuels_data.csv: 8760
Length of Generators_variability.csv: 8759
Length of Load_data.csv: 8759
2021prices.csv: 8759
IntGenbyFuel2021_hourly.csv: 8759


In [39]:
# Load each CSV file into a separate pandas DataFrame
df1 = pd.read_csv('Fuels_data.csv')
df2 = pd.read_csv('Generators_variability.csv')
df3 = pd.read_csv('Load_data.csv')
df4 = pd.read_csv('2021prices.csv')

# Define the start and end dates and hours as strings in 'MM/DD HH' format
start_date = '01/01 01'
end_date = '01/31 00'

# # Create a new DataFrame column with the year of the Time_Index for each DataFrame
# df1['Year'] = pd.to_datetime(df1['Time_Index'], format='%Y-%m-%d %H:%M:%S').dt.year
# df2['Year'] = pd.to_datetime(df2['Time_Index'], format='%Y-%m-%d %H:%M:%S').dt.year
# df3['Year'] = pd.to_datetime(df3['Time_Index'], format='%Y-%m-%d %H:%M:%S').dt.year
# df4['Year'] = pd.to_datetime(df4['Time_Index'], format='%Y-%m-%d %H:%M:%S').dt.year

# Add the year to the start and end dates and convert to datetime objects
start_date = pd.to_datetime(f"2021/{start_date}", format='%Y/%m/%d %H')
end_date = pd.to_datetime(f"2021/{end_date}", format='%Y/%m/%d %H')

# Filter each DataFrame to only include rows with dates and hours in the specified range
filtered_df1_without_timeindex_0 = df1[(df1['Time_Index'] >= int(start_date.strftime('%Y%m%d%H'))) &
                                       (df1['Time_Index'] <= int(end_date.strftime('%Y%m%d%H'))) &
                                       (df1['Time_Index'] != 0)]
filtered_df1_with_timeindex_0 = df1[df1['Time_Index'] == 0]

# Append the row with Time_Index = 0 at the beginning of the filtered_df1 DataFrame
filtered_df1 = pd.concat([filtered_df1_with_timeindex_0, filtered_df1_without_timeindex_0])

# Sort each filtered DataFrame by the Time_Index column
sorted_df1 = filtered_df1.sort_values('Time_Index')
sorted_df2 = df2[(pd.to_datetime(df2['Time_Index'], format='%Y-%m-%d %H:%M:%S') >= start_date) & 
                   (pd.to_datetime(df2['Time_Index'], format='%Y-%m-%d %H:%M:%S') <= end_date)].sort_values('Time_Index')
sorted_df3 = df3[(pd.to_datetime(df3['Time_Index'], format='%Y-%m-%d %H:%M:%S') >= start_date) & 
                   (pd.to_datetime(df3['Time_Index'], format='%Y-%m-%d %H:%M:%S') <= end_date)].sort_values('Time_Index')
sorted_df4 = df4[(pd.to_datetime(df4['Time_Index'], format='%Y-%m-%d %H:%M:%S') >= start_date) &
                   (pd.to_datetime(df4['Time_Index'], format='%Y-%m-%d %H:%M:%S') <= end_date)].sort_values('Time_Index')

# Input the load parameters
# Voll
sorted_df3.iloc[0, 1] = 9000
# Demand_Segment
sorted_df3.iloc[0, 2] = 1
# Cost_of_Demand_Curtailment_per_MW
sorted_df3.iloc[0, 3] = 1
# Max_Demand_Curtailment
sorted_df3.iloc[0, 4] = 1
# Rep_Periods
sorted_df3.iloc[0, 5] = 1
# Timesteps_per_Rep_Period
sorted_df3.iloc[0, 6] = len(sorted_df3) / sorted_df3.iloc[0, 5]
# Sub_Weights
sorted_df3.iloc[0, 7] = 1

# Overwrite each original CSV file with the sorted, filtered DataFrame
sorted_df1.to_csv('Fuels_data.csv', index=False)
sorted_df2.to_csv('Generators_variability.csv', index=False)
sorted_df3.to_csv('Load_data.csv', index=False)
sorted_df4.to_csv('2021prices.csv', index=False)

# Print the length of each DataFrame
print(f"Length of Fuels_data.csv: {len(sorted_df1)}")
print(f"Length of Generators_variability.csv: {len(sorted_df2)}")
print(f"Length of Load_data.csv: {len(sorted_df3)}")
print(f"Length of 2021prices.csv: {len(sorted_df4)}")

ValueError: time data "1" doesn't match format "%Y-%m-%d %H:%M:%S", at position 0. You might want to try:
    - passing `format` if your strings have a consistent format;
    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.