## FISH CATCHES IN IRELAND AND IN COMPARISIM TO OTHER E.U COUNTRIES


In [1]:
# importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statistics as stats

In [2]:
df=pd.read_csv('fish_capture.csv')

In [3]:
df.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Aruba,ABW,Capture fisheries production (metric tons),ER.FSH.CAPT.MT,200.0,200.0,300.0,300.0,300.0,300.0,...,138.0,145.0,152.0,150.0,150.0,149.0,149.0,163.0,155.0,
1,Africa Eastern and Southern,AFE,Capture fisheries production (metric tons),ER.FSH.CAPT.MT,1485740.0,1632169.0,1735664.0,1848486.0,2164132.0,2174850.0,...,3823494.99,3701198.31,3927435.12,4033890.26,4188878.44,4210226.37,4149011.16,4220081.72,4192288.05,
2,Afghanistan,AFG,Capture fisheries production (metric tons),ER.FSH.CAPT.MT,200.0,300.0,300.0,300.0,300.0,300.0,...,1300.0,1450.0,1600.0,1750.0,1900.0,2000.0,2050.0,2100.0,2000.0,
3,Africa Western and Central,AFW,Capture fisheries production (metric tons),ER.FSH.CAPT.MT,488505.0,513837.0,560769.0,582503.0,646740.0,680876.0,...,3112164.29,3172804.04,3221896.54,3156916.84,3473352.32,4126363.57,4230596.86,4015509.77,3855653.1,
4,Angola,AGO,Capture fisheries production (metric tons),ER.FSH.CAPT.MT,257300.0,246800.0,274800.0,245300.0,362000.0,263300.0,...,374000.0,407000.0,442084.0,495232.0,486490.0,531575.0,443066.0,400762.0,377345.0,


In [4]:
#changing date columns to rows
df=df.melt(id_vars=["Country Name", "Country Code",'Indicator Name','Indicator Code'], 
        var_name="Date", 
        value_name="captured fishes (metric tons)")


In [5]:
#dropping columns
df= df.drop(columns= ['Indicator Name', 'Indicator Code'])

In [6]:
#checking for duplicate rows
duplicate_rows_df=df[df.duplicated()]
print("number of duplicate rows:", duplicate_rows_df.shape)

number of duplicate rows: (0, 4)


In [7]:
#checking for null values
print(df.isnull().sum())

Country Name                        0
Country Code                        0
Date                                0
captured fishes (metric tons)    1851
dtype: int64


In [8]:
#dropping null values
df = df.dropna()

In [9]:
df.head()

Unnamed: 0,Country Name,Country Code,Date,captured fishes (metric tons)
0,Aruba,ABW,1960,200.0
1,Africa Eastern and Southern,AFE,1960,1485740.0
2,Afghanistan,AFG,1960,200.0
3,Africa Western and Central,AFW,1960,488505.0
4,Angola,AGO,1960,257300.0





## DATA FOR THE E.U

In [10]:
# Create a dataframe with only the rows for the specified EU countries and 
#sort it in ascending order by country name and date

df_eu = (df.apply(lambda row: row[df['Country Name'].isin
        (['Austria', 'Belgium', 'Bulgaria', 'Croatia','Cyprus', 'Czech Republic', 
          'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 
          'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania',
          'Slovakia', 'Slovenia', 'Spain', 'Sweden'])])).sort_values(by=['Country Name','Date'], ascending=True)

# Print the first 5 rows of the resulting dataframe
df_eu.head()


Unnamed: 0,Country Name,Country Code,Date,captured fishes (metric tons)
14,Austria,AUT,1960,3550.0
280,Austria,AUT,1961,3540.0
546,Austria,AUT,1962,3830.0
812,Austria,AUT,1963,4000.0
1078,Austria,AUT,1964,4220.0


In [11]:
#descriptive statistics of dataframe
df_eu.describe().apply(lambda x: int(x), axis=1)

count       1377
mean      269246
std       371072
min          263
25%         9336
50%       135467
75%       348441
max      2013518
dtype: int64

# VIRTUALIZATION

In [12]:
#Line graph to show the trend of captured fishes in ireland from 1960-2020
#geo virtualisation to show ireland compared to other E.U countries
#histogram to show the average number of fish captures in E.U countries from 1960-2020

In [None]:
import plotly.express as px

In [None]:
# Filter the dataframe to include only rows for Ireland
df_ireland = df_eu[df_eu['Country Name'] == 'Ireland']
# Create the line chart
fig = px.line(df_ireland, x='Date', y='captured fishes (metric tons)', title='Captured Fishes in Ireland')
# Set the gridline color
fig.update_layout(
    xaxis=dict(
        showgrid=True,
        gridcolor='#bdbdbd'
    ),
    yaxis=dict(
        showgrid=True,
        gridcolor='#bdbdbd'
    )
)


fig.show()



In [None]:
 

custom_color_scale = ['#FF0000', '#FFFF00', '#00FF00']

fig = px.choropleth(df_eu,
                    locations='Country Code',
                    color='captured fishes (metric tons)',
                    hover_name='Country Name',
                    animation_frame='Date',
                    title='Captured Fishes by Country',
                    height=900,
                    color_continuous_scale=custom_color_scale)

fig.update_layout(title_text= "Fish Catches In E.U Countries",
                  geo_scope='europe')
fig.show()



## THE TOTAL AVERAGE OF FISH CATCHES FOR E.U COUNTRIES FROM 1960-2020

In [None]:
# Group the data by country
df_grouped = df_eu.groupby(['Country Name'])

# Calculate the average for each group
df_averages = df_grouped['captured fishes (metric tons)'].mean()

# Round the values in the 'captured fishes (metric tons)' column to the nearest integer
df_averages = df_averages.round()

# Reset the index to make the country column a regular column
df_averages = df_averages.reset_index()

# Print the resulting dataframe
print(df_averages)


In [None]:
fig = px.histogram(df_averages, x='Country Name', y='captured fishes (metric tons)', title='Average Captured Fishes (Metric Tons) in all E.U countries 1960-2020')
fig.show()


# DASHBOARD

In [None]:
#USING THE GEODATA MAP TO CREATE A WEB DASHBOARD

In [None]:
#pip install dash

In [None]:
import dash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output

In [None]:
app = dash.Dash()

# Get a list of all the countries in the dataframe
countries = df_eu['Country Name'].unique()

app.layout = html.Div([
    # Dropdown menu for selecting countries
    dcc.Dropdown(
        id='countries-dropdown',
        options=[{'label': c, 'value': c} for c in countries],
        multi=True,
        value=['France', 'Germany']
    ),
    # Choropleth map
    dcc.Graph(
        id='choropleth-map'
    )
])

@app.callback(
    Output('choropleth-map', 'figure'),
    [Input('countries-dropdown', 'value')]
)
def update_map(selected_countries):
    # Filter the dataframe by the selected countries
    df_filtered = df_eu[df_eu['Country Name'].isin(selected_countries)]
    
    # Create the figure with the filtered data
    fig = px.choropleth(df_filtered,
                        locations='Country Code',
                        color='captured fishes (metric tons)',
                        hover_name='Country Name',
                        animation_frame='Date',
                        title='Captured Fishes by Country',
                        height=900,
                        color_continuous_scale=custom_color_scale)

    fig.update_layout(title_text= "Fish Catches In E.U Countries",
                      geo_scope='europe')
    return fig

if __name__ == '__main__':
    app.run_server()


# SENTIMENT ANALYSIS