In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from numpy.random import default_rng

class GreenhouseGasProfile:
    def __init__(self, data_path, metadata_path):
        self.data_path = data_path
        self.metadata_path = metadata_path

        self.dataset = pd.read_csv(data_path, delimiter=';').drop('ID', axis=1).set_index('DutchEconomy')
        self.metadata = pd.read_csv(metadata_path, delimiter=';', on_bad_lines='skip', header=None, skiprows=9)

        self.gas_info = self.metadata[0:17]
        self.gas_info.columns = ['ID', 'Position', 'ParentID', 'Type', 'Key', 'Title', 'Description', 'ReleasePolicy', 
                                 'Datatype', 'Unit', 'Decimals', 'Default']
        self.gas_info = self.gas_info[['Key', 'Title', "Description", "Unit"]].set_index("Key")
        
        self.source_info = self.metadata[19:78]
        self.source_info.columns = ['DutchEconomy', 'Title', 'Description', 'NA1', 'NA2', 'NA3', 'NA4', 'NA5', 'NA6', 
                                    'NA7', 'NA8', 'NA9']
        self.source_info = self.source_info[['DutchEconomy', 'Title', 'Description']].set_index('DutchEconomy')

        emission_sources = ['NAs'] * len(self.dataset)
        emission_sources[0:17] = self.gas_info['Title'].values

        date_data = [entry[0:4] for entry in self.dataset['Periods']]
        date_data = pd.to_datetime(date_data, format="%Y")

        self.dataset['Source'] = emission_sources
        self.dataset['Date'] = date_data

        self.dataset['HFC'] = self.dataset['GreenhouseGasEquivalents_6'].values - (self.dataset['TotalCO2_1'].values + 
                                self.dataset['CH4_5'].values * 25 + self.dataset['N2O_4'].values * 298)  

        self.source_info.index = self.source_info.index.str.strip()
        self.dataset.index = self.dataset.index.str.strip()

    def draw_plot(self, emissions, sectors):
        emission_dict, key_dict = {}, {}
        for sector in sectors:
            matches = self.source_info['Description'].str.contains(sector, case=False).sum()
            key_dict[sector] = [''] * matches
            emission_dict[sector] = [''] * matches

        for sector in sectors:
            if sector in self.source_info.index:
                emission_dict[sector] = [self.source_info.loc[sector,'Title']]
                key_dict[sector] = [sector]
            else:
                emission_dict[sector] = self.source_info.loc[self.source_info['Description'].str.contains(sector, 
                                        case=False), 'Title'].values.tolist()
                key_dict[sector] = self.source_info.loc[self.source_info['Description'].str.contains(sector, 
                                        case=False)].index.tolist()

        keys = sum(list(key_dict.values()), [])
        titles = sum(list(emission_dict.values()), [])
        
        emissions.append('Date')
        plot_data = self.dataset.loc[keys, emissions].reset_index()
        emissions.pop()

        melted_data = pd.melt(frame=plot_data, id_vars=['Date', 'DutchEconomy'], value_vars=emissions, 
                              var_name='Emission Type:', value_name='Emission:')

        melted_data['DutchEconomy'] = melted_data['DutchEconomy'].map(dict(zip(keys, titles)))

        fig, ax = plt.subplots(figsize=(12, 8))
        sns.lineplot(data=melted_data, x='Date', y='Emission:', hue='DutchEconomy', style='Emission Type:', 
                     dashes=False, markers=True, ax=ax)
        plt.title('Greenhouse Gas Emissions')
        plt.ylabel('Emission')
        plt.xlabel('Date')
        plt.show()
