In [3]:
import pandas as pd 

In [8]:
df = pd.DataFrame(columns=['Date', 'Australia'])

# add the two rows to the dataframe
df.loc[len(df)] = ['2025-12-01', 0.0]
df.loc[len(df)] = ['2025-01-01', 0.0]

# set the 'Australia' column values for the two rows
df.loc[df['Date'] == '2025-12-01', 'Australia'] = 17116.58
df.loc[df['Date'] == '2025-01-01', 'Australia'] = 13787.12

In [10]:
df

Unnamed: 0,Date,Australia
0,2025-12-01,17116.58
1,2025-01-01,13787.12


In [13]:
df.to_csv('File_to_add.csv',index=False)

In [3]:
import sys
from src.logger import logging

def error_message_detail(error,error_detail:sys):
    _,_,exc_tb=error_detail.exc_info()
    file_name=exc_tb.tb_frame.f_code.co_filename
    error_message="Error occured in python script name [{0}] line number [{1}] error message[{2}]".format(
     file_name,exc_tb.tb_lineno,str(error))

    return error_message

    

class CustomException(Exception):
    def __init__(self,error_message,error_detail:sys):
        super().__init__(error_message)
        self.error_message=error_message_detail(error_message,error_detail=error_detail)
    
    def __str__(self):
        return self.error_message

In [7]:
transformed_CPI = pd.read_csv('Transformed_data/CPI_transformed.csv')
transformed_ER = pd.read_csv('Transformed_data/ER_transformed.csv')
transformed_Exports = pd.read_csv('Transformed_data/Exports_transformed.csv')

In [21]:
transformed_Exports.Country.unique().shape

(215,)

In [20]:
transformed_CPI.Country.unique().shape

(215,)

In [22]:
transformed_ER.Country.unique().shape

(217,)

In [4]:
CPI_data = pd.read_csv('Monthly_data/CPI-monthly.csv')
ER_data = pd.read_csv('Monthly_data/ER.csv')
Exports_data = pd.read_csv('Monthly_data/Exports.csv')

In [26]:
CPI_data.shape

(360, 173)

In [27]:
ER_data.shape

(360, 201)

In [28]:
Exports_data.shape

(333, 55)

In [5]:
def rename_columns(df):
    try : 
    ### Some countries have 0 In place of na so it has to be corrected
        new_columns = []
        for column in df.columns:
            if '0' in column:
                new_column = column.replace('0', 'na')
                new_columns.append(new_column)
            else:
                new_columns.append(column)
        df.columns = new_columns
    except:
        raise CustomException('Error while renaming the countries in rename_columns() function')
    return df 

In [6]:
def fill_missing_values(df):
    try :  
    
        df.fillna(0, inplace=True)
    except:
        raise CustomException('Error while filling the missing values in fill_missing_values() function')


In [15]:

def add_missing_countries(df, reference_df1,reference_df2):
    
# Add missing countries for each dataframe 
    try : 
        columns = set(df.columns)
        reference1_columns = set(reference_df1.columns)
        reference2_columns = set(reference_df2.columns)
        not_in_df = list(reference1_columns.union(reference2_columns) - columns)
        df[not_in_df] = 0
    except:
        raise CustomException('Error while adding the missing countries in add_missing_countries() function')
    return df 
        

In [34]:
CPI_data = rename_columns(CPI_data)
ER_data = rename_columns(ER_data)
Exports_data = rename_columns(Exports_data)
fill_missing_values(CPI_data)
fill_missing_values(ER_data)
fill_missing_values(Exports_data)
added_count_CPI=add_missing_countries(CPI_data,ER_data,Exports_data)
added_count_ER=add_missing_countries(ER_data,CPI_data,Exports_data)
added_count_Exports=add_missing_countries(Exports_data,CPI_data,ER_data)

In [36]:
added_count_Exports.shape

(333, 202)

In [38]:
added_count_CPI.shape

(360, 202)

In [39]:
added_count_ER.shape

(360, 202)

None


In [None]:
def add_missing_dates(df, reference_df1,reference_df2):
# Add missing Dates for each dataframe
    try : 
        dates = set(df['Year'].values)
        if(reference_df1.empty==False): 
            reference1_dates = set(reference_df1['Year'].values)
        else : 
            reference1_dates=set()
        if(reference_df2.empty==False): 
            reference2_dates = set(reference_df2['Year'].values) 
        else : 
            reference2_dates=set()  
        dates_not_in_df = list(reference1_dates.union(reference2_dates) - dates)
        additional_dataframe = pd.DataFrame(columns=df.columns)
        additional_dataframe['Year'] = dates_not_in_df
        columns = [col for col in df.columns if col != "Year"]
        additional_dataframe[columns] = 0
        df = pd.concat([additional_dataframe, df])
        return df
    except : 
        raise CustomException('Error while adding the missing dates in add_missing_dates() function')

In [None]:
def reformat_dataframe(df, name):
# Change the format of the data from country names as columns to values 
    try : 
        countries = [country for country in df if country != 'Year']
        formatted_df = df.melt(id_vars='Year', value_vars=countries, var_name='Country', value_name=name)
        formatted_df.rename(columns={'Year': 'Date'}, inplace=True)
        formatted_df['Date'] = pd.to_datetime(formatted_df['Date'], format='%YM%m')
        return formatted_df
    except:
        raise CustomException('Error while reformatting the dataframes in reformat_dataframe() function')


In [None]:
def format_date(dataframe):
    # Rename the 'Year' column to 'Date'
    try : 
        dataframe.rename(columns={'Year': 'Date'}, inplace=True)
        
        # Convert the 'Date' column to a pandas DateTime object
        dataframe['Date'] = pd.to_datetime(dataframe['Date'], format='%YM%m')
    except:
        raise CustomException('Error while reformatting the dates in format_date() function')



In [41]:
def transform(df1,df2,df3,name): 
    try : 
        rename_columns(df1) 
        fill_missing_values(df1)
        new_df1=add_missing_countries(df1,df2,df3)
        new_df1=add_missing_dates(df1,df2,df3)
        formatted_df=reformat_dataframe(new_df1,name)
        format_date(formatted_df)
        logging.info('Data is transformed')
        return formatted_df
    except :
        logging.error('Transformation error') 
        raise CustomException('Error while transforming the data')


In [24]:
formatted_CPI = transform(CPI_data,ER_data,Exports_data,'CPI')
formatted_ER = transform(ER_data,CPI_data,Exports_data,'ER')
formatted_Exports = transform(Exports_data,CPI_data,Exports_data,'Exports')

NameError: name 'transform' is not defined

In [None]:
formatted_Exports.Date.unique().shape

In [None]:
formatted_Exports.Country.unique().shape

In [None]:
formatted_CPI.Date.unique().shape

In [None]:
formatted_ER.Date.unique().shape

In [None]:
formatted_ER.Country.unique().shape

In [7]:
ER_data = rename_columns(ER_data)

0      0.000000
1      0.000000
2      0.000000
3      0.000000
4      0.000000
         ...   
355    1.681886
356    1.703457
357    1.686150
358    1.705174
359    1.722000
Name: Bosnia and Herzegovina, Length: 360, dtype: float64

In [None]:
formatted_CPI.Country.unique().shape