<a href="https://colab.research.google.com/github/nawafalrumaihi/portfolio/blob/main/Bahrain_OceanographicData.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Average Sea Site Salinity (2007 - 2017) in Kingdom of Bahrain

In [None]:
# create an interactive scatterplot with the averages of both salinity and temperature
import pandas as pd
import plotly.express as px

# Load the SST data
sst_file_path = "marine 2007 - 2016 may 2017.csv"  # Replace with your SST data file path
sst_data_df = pd.read_csv(sst_file_path, encoding='ISO-8859-1')

# Load the geolocation data
geo_location_file_path = "Bahrain Climate Data - Geo Locations - Sheet2.csv"  # Replace with your geolocation data file path
# encode the file with ISO-8859-1 to avoid encoding errors
geo_location_df = pd.read_csv(geo_location_file_path, encoding='ISO-8859-1')

# Standardize the location names in both datasets for accurate merging
sst_data_df['locations'] = sst_data_df['locations'].str.strip().str.lower()
geo_location_df['Location'] = geo_location_df['Location'].str.strip().str.lower()

# Merge the datasets on the location field
merged_data_df = pd.merge(sst_data_df, geo_location_df, left_on='locations', right_on='Location')

# Convert the 'Date' column to a datetime object
merged_data_df['Date'] = pd.to_datetime(merged_data_df['Date'], errors='coerce')

# Replace commas and convert the 'Salinity %0' column to numeric
merged_data_df['Salinity %0'] = pd.to_numeric(merged_data_df['Salinity %0'].str.replace(',', ''), errors='coerce')

# Remove rows with missing salinity data
cleaned_data_df = merged_data_df.dropna(subset=['Salinity %0'])

# Create a new dataframe with the average salinity for each location
# Include latitude and longitude in the grouped dataframe
avg_salinity_df = cleaned_data_df.groupby('locations').agg({
    'Salinity %0': 'mean',
    'Latitude_dd': 'first',  # Assuming each location has a unique latitude
    'Longitude_dd': 'first'  # Assuming each location has a unique longitude
}).reset_index()

# Create a non - map scatter plot for Salinity
fig_avg_salinity = px.scatter(
    avg_salinity_df,
    x="Longitude_dd",
    y="Latitude_dd",
    color="Salinity %0",
    size="Salinity %0",
    hover_name="locations",
    hover_data=["Salinity %0"],
    color_continuous_scale=px.colors.sequential.Blues,
    size_max=15
)

fig_avg_salinity.update_layout(
    title_text="Average Salinity in Bahrain by Longitutde & Latitude (2007 - 2017)",
    xaxis_title="Longitude",
    yaxis_title="Latitude",
    # add text for source below the chart and x-axis
    annotations=[
        dict(
            x=0.5,
            y=-0.20,
            showarrow=False,
            text="Source: Kingdom of Bahrain - Supreme Council for Environment",
            xref="paper",
            yref="paper"
        )
    ],
    title_x=0.5
)

fig_avg_salinity.show()