In [1]:
import plotly.express as px
import json
import pandas as pd

In [2]:
file_path = "/home/users/dsdb/GIATEX/datasets/GIATEX_dataset_merged_test.csv"

# Try reading with different encodings
encodings_to_try = ['utf-8', 'latin-1', 'ISO-8859-1']
for encoding in encodings_to_try:
    try:
        df_giatex_original = pd.read_csv(file_path, sep=';', encoding=encoding)
        print(f"Successfully read the file with encoding: {encoding}")
        break  # Exit the loop if successful
    except UnicodeDecodeError:
        print(f"Failed to read with encoding: {encoding}")

# Continue with your code
df_giatex_copy = df_giatex_original.copy()

# Define the list of empresas
# empresas = [
#     'GT01 - NEIPER', 'GT04 - ADALBERTO', 'GT07 - SOMELOS', 'GT10 - FITECOM',
#     'GT02 - TINTEX', 'GT05 - JFA', 'GT08 - TINAMAR', 'GT11 - ACATEL',
#     'GT03 - DSF', 'GT06 - RIFER', 'GT09 - RIOPELE', 'GT12 - ABM'
# ]

Failed to read with encoding: utf-8
Successfully read the file with encoding: latin-1


In [3]:
df_giatex_copy

Unnamed: 0,Empresa,snapshot,mes,ano,bacia_hidrografica,latitude,longitude,recolha_agua,indice_seca,precipitation,...,Magnésio,Zinco,Cálcio,Cloretos,Sulfatos,Sulfitos,Sulfuretos,Hidrocarbonetos totais,Óleos e gorduras,Detergentes aniónicos
0,GT01 - NEIPER,,,,,,,,,,...,,,,,,,,,,
1,GT01 - NEIPER,,,,,,,,,,...,,,,,,,,,,
2,GT01 - NEIPER,,,,,,,,,,...,,,,,,,,,,
3,GT01 - NEIPER,,,,,,,,,,...,,,,,,,,,,
4,GT01 - NEIPER,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
911,GT02 - TINTEX,16/11/2023,11.0,2023.0,Minho,41.935523,-8.750388,Furo,Chuva extrema,chuva,...,3.32,0.05,7.3,1570.00,1590.0,2.0,0.05,0.29,0.54,0.13
912,GT02 - TINTEX,16/11/2023,11.0,2023.0,Minho,41.935523,-8.750388,Furo,Chuva extrema,chuva,...,3.62,0.05,8.5,10500.00,608.0,4.0,0.05,14.40,197.00,9.07
913,GT02 - TINTEX,16/11/2023,11.0,2023.0,Minho,41.935523,-8.750388,Furo,Chuva extrema,chuva,...,3.36,0.06,6.8,78.50,21.2,3.0,0.05,0.22,0.99,0.02
914,GT12 - ABM,16/11/2023,11.0,2023.0,Cavado,41.524477,-8.632028,Rio,Chuva extrema,chuva,...,,,,10.05,10.0,,,,,


In [4]:
"""
Cell generated by Data Wrangler.
"""
def clean_data(df_giatex_copy):
    # Drop columns: 'Empresa', 'snapshot' and 20 other columns
    df_giatex_copy = df_giatex_copy.drop(columns=['Empresa', 'snapshot', 'mes', 'ano', 'recolha_agua', 'indice_seca', 'precipitation', 'soil_moisture_225', 'processo_representativo', 'pre_tratamento', 'origem', 'Line', 'ID Processo', 'Estrutura', 'Fibra', 'Corante', 'Cor', 'Máquina', 'R:B', 'Banho simplif.', 'Banho', 'banho_representativo'])
    # Filter rows based on column: 'bacia_hidrografica'
    df_giatex_copy = df_giatex_copy[df_giatex_copy['bacia_hidrografica'].notna()]
    
    return df_giatex_copy

df_giatex_copy_clean = clean_data(df_giatex_copy.copy())

In [14]:
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd

# Assuming df_giatex_copy_clean is your DataFrame containing longitude, latitude, and pH values

# Compute average conductivity for each unique combination of latitude and longitude
average_condutividade = df_giatex_copy_clean.groupby(['latitude', 'longitude'])['Condutividade'].mean().reset_index()

# Create a scatter map with longitude and latitude
fig = px.scatter_mapbox(average_condutividade,
                        lat='latitude',
                        lon='longitude',
                        color='Condutividade',  # Color bubbles based on average conductivity values
                        color_continuous_scale='Blackbody',  # Choose any color scale you prefer
                        size='Condutividade',  # Size bubbles based on average conductivity values
                        zoom=3)

# Update layout
fig.update_layout(
    title='Map with Average Conductivity Values',
    mapbox_style="carto-positron",  # Choose map style
    mapbox_zoom=6.2,
    mapbox_center={"lat": 41.30425259851746, "lon": -7.679317663875982},
)

# Set the sizemode attribute to 'area' to maintain constant bubble size
fig.update_traces(
    marker=dict(sizemode='area'),
    line_width=0.5,
    )

# Show the plot
fig.show()