# Analysis of Environemental Health - Country Wise

## Data from: https://search.earthdata.nasa.gov/search

### The PMD index provides information about the air quality of a specific country by measuring the presence of fine particulate matter (PM2.5), which consists of dust particles smaller than 2.5 microns in diameter.

### The categorize_pmd() function assigns air quality categories based on predefined PMD thresholds:

#### PMD < 15: Green (Good) – Indicates good air quality with minimal health risks. 
#### 15 ≤ PMD ≤ 50: Orange (Moderate) – Represents moderate air quality with potential health concerns for sensitive groups. 
#### PMD > 50: Red (Severe) – Reflects poor air quality, posing significant health risks for the general population.



In [17]:
import plotly.express as px
data = pd.read_csv('PMD_ind.csv')
pmd_columns = [col for col in data.columns if col.startswith('PMD.ind')]

data['PMD_mean'] = data[pmd_columns].median(axis = 1) # we take the median instead

# Replace invalid values with NaN
data['PMD_mean'].replace({0: np.nan, -8888: np.nan, -7777: np.nan, -9999: np.nan}, inplace=True)

# Drop rows with NaN values in the PMD_mean column
data = data.dropna(subset=['PMD_mean'])

def categorize_pmd(pmd):
    if pmd < 15:
        return 'Good'
    elif 15 <= pmd <= 50:
        return 'Moderate'
    else:
        return 'Severe'
    
# """A categorize_pmd() function assigns categories (Good, Moderate, Severe) based on PMD thresholds.
# PMD < 15: Green (Good), 15 <= PMD <= 50: Orange (Moderate), PMD > 50: Red (Severe)."""
    
data['PMD_Level'] = data['PMD_mean'].apply(categorize_pmd)
data['PMD_Level'] = pd.Categorical(data['PMD_Level'], categories = ['Moderate', 'Good', 'Severe'])

# define a color map
color_map = {
    'Good' : 'green',
    'Moderate' : 'orange',
    'Severe' : 'red'
}

# Plot the mean PMD for all countries
fig = px.scatter(
    data,
    x= 'country',
    y = 'PMD_mean',
    size = 'PMD_mean',
    color = 'PMD_Level',
    color_discrete_map = color_map,
    title = 'Scatter Plot of mean PMD (a measure of Air Quality) for all Countries',
    labels = {'PMD_mean': 'Mean PMD', 'country' : 'Country'},
    template = 'plotly'
)

fig.update_layout(
    xaxis_title  = 'Country',
    yaxis_title = 'Median PMD',
    xaxis = dict(tickangle = 90), # rotate x-axis labels
    height = 600,
    width = 1000
)

fig.show()



# Plot Environmental Data by Country
### You can now analyze any of the environmental symbols listed below by country. The thresholds used for categorization (e.g., Good, Moderate, Severe) are the same as those defined for PMD above, as they generally apply to most of these indices.

### However, some symbols, like REC (Recycling Rate), have different interpretations. For example:

### A higher REC index indicates better recycling performance by a country.
#### Note: To fully understand the results of the plots, please refer to the PDF provided by Earth-data for detailed explanations of each symbol.

### Symbols:    
GHN: Greenhouse Gas Emissions (% contribution).
CDA: Carbon Dioxide (CO2) Growth Rate.
CHA: Methane (CH4) Growth Rate.
LCB: CO2 from Land Cover (Land Use and Land-Use Change).
GIB: GHG Intensity Trend (Greenhouse Gas per economic output).
FGA: F-Gas Growth Rate (Fluorinated gases).
BCA: Black Carbon Growth Rate.
GHP: GHG Emissions per Capita.
NDA: Nitrous Oxide (N2O) Growth Rate.
Environmental Health
HLT: Overall Health Impact (% contribution).
AIR: Air Quality.
H2O: Sanitation & Drinking Water.
HMT: Heavy Metals (e.g., lead exposure).
UWD: Unsafe Drinking Water.
USD: Unsafe Sanitation.
PBD: Lead Exposure.
MSW: Controlled Solid Waste.
REC: Recycling Rates.
OCP: Ocean Plastic Pollution.
TCL: Tree Cover Loss.
GRL: Grassland Loss.
WTL: Wetland Loss.
SDA: SO2 (Sulfur Dioxide) Growth Rate.
NXA: NOX (Nitrogen Oxides) Growth Rate.
WWT: Wastewater Treatment.
Air Pollution
PMD: PM2.5 Exposure (fine particulate matter pollution).
HAD: Household Solid Fuels.
OZD: Ozone Exposure.
NOE: Nitrogen Oxide (NOx) Exposure.
SOE: Sulfur Dioxide (SO2) Exposure.
COE: Carbon Monoxide (CO) Exposure.
VOE: Volatile Organic Compounds (VOC) Exposure.
Waste Management
WMG: Waste Management in General (% contribution).
Biodiversity & Habitat
BDH: Biodiversity & Habitat.
TBN: Terrestrial Biome Protection (National Level).
TBG: Terrestrial Biome Protection (Global Level).
MPA: Marine Protected Areas.
PAR: Protected Areas Representation Index.
SHI: Species Habitat Index.
SPI: Species Protection Index.
BHV: Biodiversity Habitat Index.
Ecosystem Vitality
ECO: Ecosystem Vitality.
ECS: Ecosystem Services.
ACD: Acid Rain.
WRS: Water Resources.
Fisheries
FSH: Fisheries.
FSS: Fish Stock Status.
RMS: Marine Trophic Index.
FTD: Fish Caught by Trawling.
Agriculture
AGR: Agriculture.
SNM: Sustainable Nitrogen Management Index.
SPU: Sustainable Pesticide Use."""



In [3]:
import pandas as pd
import numpy as np
import plotly.express as px

class SymbolDataProcessor:
    def __init__(self, symbol):
        """
        Initialize the class with a symbol and load the corresponding file.
        """
        self.symbol = symbol
        self.file_name = f"{symbol}_ind.csv"
        self.data = None
        self.years = []

    def load_data(self):
        """
        Load the CSV file, process the data, and remove invalid or NaN values.
        """
        # Read the CSV file
        self.data = pd.read_csv(self.file_name)

        # Extract years from column names
        for col in self.data.columns:
            if col.startswith(self.symbol + '.ind'):
                year = col.split('.')[-1]
                self.years.append(year)

        # Replace invalid values with NaN
        self.data.replace({0: np.nan, -8888: np.nan, -7777: np.nan, -9999: np.nan}, inplace=True)

        # Drop rows with NaN values
        self.data.dropna(inplace=True)

        # Extract PMD-related columns and compute the median PMD
        columns = [col for col in self.data.columns if col.startswith(self.symbol + '.ind')]
        self.data[self.symbol+'_median'] = self.data[columns].median(axis=1)

        # Drop rows with NaN values in the PMD_mean column
        self.data = self.data.dropna(subset=[self.symbol+'_median'])

        # Categorize PMD levels
        self.data[self.symbol+'_Level'] = self.data[self.symbol+'_median'].apply(self.categorize)

        # Set PMD_Level as a categorical variable with a specific order
        self.data[self.symbol+'_Level'] = pd.Categorical(
            self.data[self.symbol+'_Level'], 
            categories=['Good', 'Moderate', 'Severe'], 
            ordered=True
        )

    @staticmethod
    def categorize(value):
        """
        Categorize PMD into Good, Moderate, and Severe levels.
        """
        if value < 15:
            return 'Good'
        elif 15 <= value <= 50:
            return 'Moderate'
        else:
            return 'Severe'

    def plot(self):
        """
        Plot a scatter plot of the mean PMD for all countries using Plotly.
        """
        if self.data is None:
            raise ValueError("Data not loaded. Call `load_data()` first.")

        # Define a color map
        color_map = {
            'Good': 'green',
            'Moderate': 'orange',
            'Severe': 'red'
        }

        # Plot the data
        fig = px.scatter(
            self.data,
            x='country',
            y=self.symbol+'_median',
            size=self.symbol+'_median',
            color=self.symbol+'_Level',
            color_discrete_map=color_map,
            title=f'Scatter Plot of Median {self.symbol} for All Countries',
            labels={'median': f'Median {self.symbol}', 'country': 'Country'},
            template='plotly'
        )

        fig.update_layout(
            xaxis_title='Country',
            yaxis_title=f'Median {self.symbol}',
            xaxis=dict(tickangle=90),  # Rotate x-axis labels
            height=600,
            width=1000
        )

        fig.show()

#  Usage
if __name__ == "__main__":
    symbol = 'PMD'
    processor = SymbolDataProcessor(symbol)

    # Load data
    processor.load_data()

    # Plot the PMD scatter plot
    processor.plot()

 

## This code snippet allows you to visualize the time series variation of any environmental symbol for specific countries. This can help track changes over time and provide deeper insights.

In [5]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

class TimeSeriesPlotter:
    def __init__(self, symbol, file_path=None):
        """
        Initialize the class with a symbol and optional file path.
        """
        self.symbol = symbol
        self.file_name = f"{symbol}_ind.csv" if file_path is None else file_path
        self.data = None
        self.years = []

    def load_data(self):
        """
        Load and preprocess the data.
        """
        # Read the CSV file
        self.data = pd.read_csv(self.file_name)

        # Extract years from the column names
        for col in self.data.columns:
            if col.startswith(self.symbol + '.ind'):
                year = col.split('.')[-1]
                self.years.append(int(year))  # Convert to integer for plotting

        # Replace invalid values with NaN
        self.data.replace({0: np.nan, -8888: np.nan, -7777: np.nan, -9999: np.nan}, inplace=True)

    def get_country_data(self, country_name):
        """
        Extract data for a specific country.
        """
        if self.data is None:
            raise ValueError("Data not loaded. Call `load_data()` first.")

        # Filter the country data
        country_data = self.data[self.data['country'] == country_name]

        # Extract PMD values starting from the relevant columns
        values = country_data.iloc[0, 3:]  # Adjust starting column index if needed
        return values

    def plot_time_series(self, country_name):
        """
        Plot the time series for a given country using Plotly.
        """
        if not self.years:
            raise ValueError("Years not extracted. Call `load_data()` first.")

        # Get PMD values for the specified country
        self.values = self.get_country_data(country_name)

        # Create a time series plot with Plotly
        fig = go.Figure()

        fig.add_trace(
            go.Scatter(
                x=self.years,
                y=self.values,
                mode='lines+markers',
                name=f'{country_name} PMD'
            )
        )

        # Add titles and labels
        fig.update_layout(
            title=f"Time Series of {self.symbol} for {country_name}",
            xaxis_title="Year",
            yaxis_title=f"{self.symbol}",
            template="plotly_white",
            height=500,
            width=800
        )

        fig.show()

# Example Usage: just input the environmental symbol and the country you wish
if __name__ == "__main__":
    symbol = 'OZD'
    plotter = TimeSeriesPlotter(symbol)

    # Load data
    plotter.load_data()

    # Plot time series for Mauritius
    plotter.plot_time_series('France')
