In [6]:
import pandas as pd

# Define the relative file path
file_path = "final/filtered_weather_data_2012_2022_RIO.csv"

# Load the CSV file
data = pd.read_csv(file_path)

# Remove the .0 from the 'week' column
data['week'] = data['week'].apply(lambda x: str(x).replace('.0', '') if '.0' in str(x) else str(x))

# Select only the required columns
selected_columns = data[['ESTACAO', 'week', 'precipitation_avg', 'precipitation_max']]

# Sort the dataframe by 'week' column
sorted_data = selected_columns.sort_values(by='week')

# Display the first few rows of the sorted data
print(sorted_data.head())

# Save the sorted data back to a new CSV file
sorted_file_path = "final/sorted_precipitation_data.csv"
sorted_data.to_csv(sorted_file_path, index=False)

print(f"Filtered and sorted data saved to {sorted_file_path}")


     ESTACAO    week  precipitation_avg  precipitation_max
0       A601  201201          14.171429              66.20
1151    A603  201201           3.771429              12.84
7231    A621  201201           2.714286              11.20
7805    A624  201201           5.771429              17.20
1722    A604  201201           7.514286              18.40
Filtered and sorted data saved to final/sorted_precipitation_data.csv


In [9]:
import pandas as pd

# Define file paths
weather_file_path = "final/sorted_precipitation_data.csv"
stations_file_path = "final/stations_RIO.csv"
output_file_path = "final/sorted_precipitation_data_with_stations.csv"

# Load the CSV files
weather_data = pd.read_csv(weather_file_path)
stations_data = pd.read_csv(stations_file_path)

# Merge the data based on the station ID
merged_data = pd.merge(
    weather_data,
    stations_data[['id_station', 'lat', 'lon', 'lvl']],
    left_on='ESTACAO',
    right_on='id_station',
    how='left'
)

# Drop the now-redundant 'id_station' column
merged_data = merged_data.drop(columns=['id_station'])

merged_data['week'] = merged_data['week'].apply(lambda x: str(x).replace('.0', '') if '.0' in str(x) else str(x))

# Save the merged data to a new CSV file
merged_data.to_csv(output_file_path, index=False)

print(f"Mergsd data saved to {output_file_path}")


Mergsd data saved to final/sorted_precipitation_data_with_stations.csv


In [5]:
import pandas as pd
import numpy as np
from pykrige.ok import OrdinaryKriging

# Load the sorted precipitation data
data = pd.read_csv("../kriging/data/sorted_precipitation_data.csv")

# Function for kriging to predict precipitation for given city coordinates and altitude
def predict_precipitation(city_lat, city_lon, city_lvl):
    weeks = sorted(data['week'].unique())
    predictions = []

    for week in weeks:
        # Filter data for the specific week
        week_data = data[data['week'] == week]

        # Extract known points and corresponding values
        known_points = week_data[['lat', 'lon', 'lvl']].values
        values_avg = week_data['precipitation_avg'].values
        values_max = week_data['precipitation_max'].values

        # Create kriging models for average and max precipitation
        kriging_avg = OrdinaryKriging(
            week_data['lat'], week_data['lon'], week_data['lvl'], values_avg,
            variogram_model='linear', verbose=False, enable_plotting=False
        )
        kriging_max = OrdinaryKriging(
            week_data['lat'], week_data['lon'], week_data['lvl'], values_max,
            variogram_model='linear', verbose=False, enable_plotting=False
        )

        # Predict precipitation for the city
        pred_avg, _ = kriging_avg.execute('points', city_lat, city_lon, city_lvl)
        pred_max, _ = kriging_max.execute('points', city_lat, city_lon, city_lvl)

        # Store the predictions for the week
        predictions.append({
            'week': week,
            'predicted_precipitation_avg': pred_avg[0],
            'predicted_precipitation_max': pred_max[0]
        })

    return pd.DataFrame(predictions)

# Example usage:
city_lat = 34.05  # Replace with your city's latitude
city_lon = -118.25  # Replace with your city's longitude
city_lvl = 89  # Replace with your city's altitude

predicted_data = predict_precipitation(city_lat, city_lon, city_lvl)
print(predicted_data)


TypeError: OrdinaryKriging.__init__() got multiple values for argument 'variogram_model'

In [18]:
def remove_nan_precipitation(filepath):
    data = pd.read_csv(filepath)
    cleaned_data = data.dropna(subset=['precipitation_avg', 'precipitation_max'])
    cleaned_data.to_csv(filepath, index=False)
    
remove_nan_precipitation("../kriging/data/sorted_precipitation_data.csv")

In [13]:
from pykrige.uk import UniversalKriging
import pandas as pd

# Load the sorted precipitation data
data = pd.read_csv("../kriging/data/sorted_precipitation_data.csv")

# Function to predict precipitation using Universal Kriging
def predict_precipitation(city_lat, city_lon, city_lvl):
    weeks = sorted(data['week'].unique())
    predictions = []

    for week in weeks:
        # Filter data for the specific week
        week_data = data[data['week'] == week]

        # Universal Kriging for precipitation_avg
        uk_avg = UniversalKriging(
            week_data['lat'], week_data['lon'], week_data['precipitation_avg'],
            variogram_model="linear", drift_terms=["external_Z"],
            external_drift=week_data["lvl"].values,
            external_drift_x=week_data['lat'].values,  # Adding latitude as external drift coordinates
            external_drift_y=week_data['lon'].values   # Adding longitude as external drift coordinates
        )
        pred_avg, _ = uk_avg.execute("points", [city_lat], [city_lon], [city_lvl])

        # Universal Kriging for precipitation_max
        uk_max = UniversalKriging(
            week_data['lat'], week_data['lon'], week_data['precipitation_max'],
            variogram_model="linear", drift_terms=["external_Z"],
            external_drift=week_data["lvl"].values,
            external_drift_x=week_data['lat'].values,  # Adding latitude as external drift coordinates
            external_drift_y=week_data['lon'].values   # Adding longitude as external drift coordinates
        )
        pred_max, _ = uk_max.execute("points", [city_lat], [city_lon], [city_lvl])

        # Safely access the predictions and append them
        predictions.append({
            'week': week,
            'predicted_precipitation_avg': pred_avg[0][0],  # First value of the prediction
            'predicted_precipitation_max': pred_max[0][0]   # First value of the prediction
        })

    return pd.DataFrame(predictions)

# Example usage with provided coordinates
city_lat = -22  # Replace with your city's latitude
city_lon = -6   # Replace with your city's longitude
city_lvl = 43   # Replace with your city's altitude

predicted_data = predict_precipitation(city_lat, city_lon, city_lvl)
print(predicted_data)


ValueError: Must specify coordinates of external Z drift terms.

Using regressing


In [16]:
data = pd.read_csv("final/sorted_precipitation_data_with_stations.csv")
cities_data = pd.read_csv("final/filtered_RJ_lat_long_data.csv")
data['week'] = data['week'].astype(str)
weeks = sorted(data['week'].unique())
# show head
print(data.head())
print(cities_data.head())
print(weeks)

  ESTACAO      week  precipitation_avg  precipitation_max        lat  \
0    A601  201201.0          14.171429              66.20 -22.800000   
1    A603  201201.0           3.771429              12.84 -22.589722   
2    A621  201201.0           2.714286              11.20 -22.860833   
3    A624  201201.0           5.771429              17.20 -22.333056   
4    A604  201201.0           7.514286              18.40 -21.566667   

         lon     lvl  
0 -43.683333    33.0  
1 -43.282222    33.0  
2 -43.411111    45.0  
3 -42.677222  1046.0  
4 -41.950000    35.0  
                 CITY STATE       LONG        LAT     ALT
0      Angra Dos Reis    RJ -44.319627 -23.009116    7.64
1             Aperibé    RJ -42.104917 -21.619422   65.93
2            Araruama    RJ -42.341096 -22.877438   12.10
3               Areal    RJ -43.101070 -22.236912  450.69
4  Armação Dos Búzios    RJ -41.887749 -22.757764   10.97
['201201.0', '201202.0', '201203.0', '201204.0', '201205.0', '201206.0', '201207.

In [18]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from pykrige.ok import OrdinaryKriging

# Load the sorted precipitation data
data = pd.read_csv("final/sorted_precipitation_data_with_stations.csv")
cities_data = pd.read_csv("final/filtered_RJ_lat_long_data.csv")

data['week'] = data['week'].astype(str)
data['week'] = data['week'].apply(lambda x: x.replace('.0', '') if '.0' in x else x)
data['week'] = data['week'].astype(str)

# Function for kriging precipitation with altitude adjustment
def predict_precipitation_regression(city_lat, city_lon, city_lvl):
    weeks = sorted(data['week'].unique())
    
    print(weeks)
    predictions = []

    for week in weeks:
        # Filter data for the specific week
        week_data = data[data['week'] == week]

        # Extract features for regression
        X = week_data[['lvl']].values  # Altitude
        y_avg = week_data['precipitation_avg'].values
        y_max = week_data['precipitation_max'].values

        # Perform linear regression on altitude
        reg_avg = LinearRegression().fit(X, y_avg)
        reg_max = LinearRegression().fit(X, y_max)

        # Calculate residuals
        residuals_avg = y_avg - reg_avg.predict(X)
        residuals_max = y_max - reg_max.predict(X)

        # Perform kriging on residuals
        kriging_avg = OrdinaryKriging(
            week_data['lat'], week_data['lon'], residuals_avg,
            variogram_model='linear', verbose=False, enable_plotting=False
        )
        kriging_max = OrdinaryKriging(
            week_data['lat'], week_data['lon'], residuals_max,
            variogram_model='linear', verbose=False, enable_plotting=False
        )

        # Predict residuals for the city
        residual_avg, _ = kriging_avg.execute('points', float(city_lat), float(city_lon))
        residual_max, _ = kriging_max.execute('points', float(city_lat), float(city_lon))

        # Add the altitude effect back to the prediction
        pred_avg = reg_avg.predict([[city_lvl]])[0] + residual_avg[0]
        pred_max = reg_max.predict([[city_lvl]])[0] + residual_max[0]

        # Store the predictions for the week
        predictions.append({
            'week': week,
            'predicted_precipitation_avg': pred_avg,
            'predicted_precipitation_max': pred_max
        })

    return predictions

results = []
for _, city in cities_data.iterrows():
    city_name = city['CITY']
    city_lat = city['LAT']
    city_lon = city['LONG']
    city_lvl = city['ALT']

    # Predict precipitation for the current city
    city_predictions = predict_precipitation_regression(city_lat, city_lon, city_lvl)

    # Add city name to each prediction
    for prediction in city_predictions:
        prediction['CITY'] = city_name
        results.append(prediction)

# Convert results to a DataFrame and save as a CSV file
output_df = pd.DataFrame(results)
output_df.to_csv("final/predicted_precipitation_kriging_regression.csv", index=False)

print("Predictions saved to '../kriging/final/predicted_precipitation_by_week.csv'")


['201201', '201202', '201203', '201204', '201205', '201206', '201207', '201208', '201209', '201210', '201211', '201212', '201213', '201214', '201215', '201216', '201217', '201218', '201219', '201220', '201221', '201222', '201223', '201224', '201225', '201226', '201227', '201228', '201229', '201230', '201231', '201232', '201233', '201234', '201235', '201236', '201237', '201238', '201239', '201240', '201241', '201242', '201243', '201244', '201245', '201246', '201247', '201248', '201249', '201250', '201251', '201252', '201301', '201302', '201303', '201304', '201305', '201306', '201307', '201308', '201309', '201310', '201311', '201312', '201313', '201314', '201315', '201316', '201317', '201318', '201319', '201320', '201321', '201322', '201323', '201324', '201325', '201326', '201327', '201328', '201329', '201330', '201331', '201332', '201333', '201334', '201335', '201336', '201337', '201338', '201339', '201340', '201341', '201342', '201343', '201344', '201345', '201346', '201347', '201348',

ValueError: zero-size array to reduction operation maximum which has no identity

Normal way without altitude

In [12]:
import pandas as pd
import numpy as np
from pykrige.ok import OrdinaryKriging

# Load the sorted data
file_path = "final/sorted_precipitation_data_with_stations.csv"
data = pd.read_csv(file_path)

# Function to perform kriging for a given city location and predict precipitation
def predict_precipitation_ordinary(city_lat, city_lon):
    # Get unique weeks from the data
    unique_weeks = data['week'].unique()

    # Store predictions for each week
    predictions = []

    # Iterate through each week
    for week in unique_weeks:
        # Filter data for the current week
        week_data = data[data['week'] == week]

        # Prepare input arrays for kriging
        lats = week_data['lat'].values
        lons = week_data['lon'].values

        avg_precip = week_data['precipitation_avg'].values
        max_precip = week_data['precipitation_max'].values

        # Ordinary Kriging for precipitation_avg
        ok_avg = OrdinaryKriging(
            lats, lons, avg_precip,
            variogram_model='linear',
            verbose=False,
            enable_plotting=False
        )
        predicted_avg, _ = ok_avg.execute('points', float(city_lat), float(city_lon))

        # Ordinary Kriging for precipitation_max
        ok_max = OrdinaryKriging(
            lats, lons, max_precip,
            variogram_model='linear',
            verbose=False,
            enable_plotting=False
        )
        predicted_max, _ = ok_max.execute('points', float(city_lat), float(city_lon))

        # Append predictions for this week
        predictions.append({
            'week': week,
            'precipitation_avg': predicted_avg[0],
            'precipitation_max': predicted_max[0]
        })

    return predictions  # Return a list of dictionaries, not a string

# Example cities data (load your actual cities CSV file)
cities_data = pd.read_csv("../kriging/data/filtered_RJ_lat_long_data.csv")

results = []
for _, city in cities_data.iterrows():
    city_name = city['CITY']
    city_lat = city['LAT']
    city_lon = city['LONG']
    city_lvl = city['ALT']

    # Predict precipitation for the current city
    city_predictions = predict_precipitation_ordinary(city_lat, city_lon)

    for prediction in city_predictions:
        prediction['CITY'] = city_name  # Add the city name to the prediction
        results.append(prediction)

# Convert results to a DataFrame and save as a CSV file
output_df = pd.DataFrame(results)
output_df.to_csv("final/predicted_precipitation_by_week_ordinary.csv", index=False)

print("Predictions saved to '../kriging/data/predicted_precipitation_by_week_ordinary.csv'")


ValueError: zero-size array to reduction operation maximum which has no identity

In [37]:
# Read the CSV file
df = pd.read_csv('../kriging/data/predicted_precipitation_by_week_ordinary.csv')

# Replace negative precipitation values with 0
df['precipitation_avg'] = df['precipitation_avg'].apply(lambda x: max(x, 0))
df['precipitation_max'] = df['precipitation_max'].apply(lambda x: max(x, 0))

# Round the precipitation values to 4 decimal places
df['precipitation_avg'] = df['precipitation_avg'].round(4)
df['precipitation_max'] = df['precipitation_max'].round(4)

# Save the updated DataFrame back to the same CSV file
df.to_csv('../kriging/data/predicted_precipitation_by_week_ordinary.csv', index=False)

print("Negative precipitation values have been replaced with 0 and rounded to 4 decimal places.")

Negative precipitation values have been replaced with 0 and rounded to 4 decimal places.


In [41]:
# Read the CSV file
df = pd.read_csv('../kriging/data/predicted_precipitation_by_week_regression.csv')

# Replace negative precipitation values with 0
df['predicted_precipitation_avg'] = df['predicted_precipitation_avg'].apply(lambda x: max(x, 0))
df['predicted_precipitation_max'] = df['predicted_precipitation_max'].apply(lambda x: max(x, 0))

# Round the precipitation values to 4 decimal places
df['predicted_precipitation_avg'] = df['predicted_precipitation_avg'].round(4)
df['predicted_precipitation_max'] = df['predicted_precipitation_max'].round(4)

# Save the updated DataFrame back to the same CSV file
df.to_csv('../kriging/data/predicted_precipitation_by_week_regression.csv', index=False)

print("Negative precipitation values have been replaced with 0 and rounded to 4 decimal places.")


Negative precipitation values have been replaced with 0 and rounded to 4 decimal places.
