In [16]:
import pandas as pd

# Define the relative file path
file_path = "../kriging/data/filtered_precipitation_data_2012_2022_by_station.csv"

# Load the CSV file
data = pd.read_csv(file_path)

# Sort the dataframe by 'week' column
sorted_data = data.sort_values(by='week')

# Display the first few rows of the sorted data
print(sorted_data.head())

# Save the sorted data back to a new CSV file
sorted_file_path = "../kriging/data/sorted_precipitation_data.csv"
sorted_data.to_csv(sorted_file_path, index=False)


     ESTACAO        lat        lon     lvl    week  precipitation_avg  \
0       A601 -22.800000 -43.683333    33.0  201201          14.171429   
2853    A607 -21.716667 -41.350000    25.0  201201           6.371429   
2281    A606 -22.983333 -42.016667     4.0  201201           2.114286   
1160    A603 -22.589722 -43.282222    33.0  201201                NaN   
4548    A610 -22.481667 -43.291389  1777.0  201201           9.685714   

      precipitation_max  
0                  66.2  
2853               25.0  
2281                5.4  
1160                NaN  
4548               19.0  


In [5]:
import pandas as pd
import numpy as np
from pykrige.ok import OrdinaryKriging

# Load the sorted precipitation data
data = pd.read_csv("../kriging/data/sorted_precipitation_data.csv")

# Function for kriging to predict precipitation for given city coordinates and altitude
def predict_precipitation(city_lat, city_lon, city_lvl):
    weeks = sorted(data['week'].unique())
    predictions = []

    for week in weeks:
        # Filter data for the specific week
        week_data = data[data['week'] == week]

        # Extract known points and corresponding values
        known_points = week_data[['lat', 'lon', 'lvl']].values
        values_avg = week_data['precipitation_avg'].values
        values_max = week_data['precipitation_max'].values

        # Create kriging models for average and max precipitation
        kriging_avg = OrdinaryKriging(
            week_data['lat'], week_data['lon'], week_data['lvl'], values_avg,
            variogram_model='linear', verbose=False, enable_plotting=False
        )
        kriging_max = OrdinaryKriging(
            week_data['lat'], week_data['lon'], week_data['lvl'], values_max,
            variogram_model='linear', verbose=False, enable_plotting=False
        )

        # Predict precipitation for the city
        pred_avg, _ = kriging_avg.execute('points', city_lat, city_lon, city_lvl)
        pred_max, _ = kriging_max.execute('points', city_lat, city_lon, city_lvl)

        # Store the predictions for the week
        predictions.append({
            'week': week,
            'predicted_precipitation_avg': pred_avg[0],
            'predicted_precipitation_max': pred_max[0]
        })

    return pd.DataFrame(predictions)

# Example usage:
city_lat = 34.05  # Replace with your city's latitude
city_lon = -118.25  # Replace with your city's longitude
city_lvl = 89  # Replace with your city's altitude

predicted_data = predict_precipitation(city_lat, city_lon, city_lvl)
print(predicted_data)


TypeError: OrdinaryKriging.__init__() got multiple values for argument 'variogram_model'

In [18]:
def remove_nan_precipitation(filepath):
    data = pd.read_csv(filepath)
    cleaned_data = data.dropna(subset=['precipitation_avg', 'precipitation_max'])
    cleaned_data.to_csv(filepath, index=False)
    
remove_nan_precipitation("../kriging/data/sorted_precipitation_data.csv")

In [13]:
from pykrige.uk import UniversalKriging
import pandas as pd

# Load the sorted precipitation data
data = pd.read_csv("../kriging/data/sorted_precipitation_data.csv")

# Function to predict precipitation using Universal Kriging
def predict_precipitation(city_lat, city_lon, city_lvl):
    weeks = sorted(data['week'].unique())
    predictions = []

    for week in weeks:
        # Filter data for the specific week
        week_data = data[data['week'] == week]

        # Universal Kriging for precipitation_avg
        uk_avg = UniversalKriging(
            week_data['lat'], week_data['lon'], week_data['precipitation_avg'],
            variogram_model="linear", drift_terms=["external_Z"],
            external_drift=week_data["lvl"].values,
            external_drift_x=week_data['lat'].values,  # Adding latitude as external drift coordinates
            external_drift_y=week_data['lon'].values   # Adding longitude as external drift coordinates
        )
        pred_avg, _ = uk_avg.execute("points", [city_lat], [city_lon], [city_lvl])

        # Universal Kriging for precipitation_max
        uk_max = UniversalKriging(
            week_data['lat'], week_data['lon'], week_data['precipitation_max'],
            variogram_model="linear", drift_terms=["external_Z"],
            external_drift=week_data["lvl"].values,
            external_drift_x=week_data['lat'].values,  # Adding latitude as external drift coordinates
            external_drift_y=week_data['lon'].values   # Adding longitude as external drift coordinates
        )
        pred_max, _ = uk_max.execute("points", [city_lat], [city_lon], [city_lvl])

        # Safely access the predictions and append them
        predictions.append({
            'week': week,
            'predicted_precipitation_avg': pred_avg[0][0],  # First value of the prediction
            'predicted_precipitation_max': pred_max[0][0]   # First value of the prediction
        })

    return pd.DataFrame(predictions)

# Example usage with provided coordinates
city_lat = -22  # Replace with your city's latitude
city_lon = -6   # Replace with your city's longitude
city_lvl = 43   # Replace with your city's altitude

predicted_data = predict_precipitation(city_lat, city_lon, city_lvl)
print(predicted_data)


ValueError: Must specify coordinates of external Z drift terms.

Using regressing


In [25]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from pykrige.ok import OrdinaryKriging

# Load the sorted precipitation data
data = pd.read_csv("../kriging/data/sorted_precipitation_data.csv")
cities_data = pd.read_csv("../kriging/data/filtered_RJ_lat_long_data.csv")

# Function for kriging precipitation with altitude adjustment
def predict_precipitation_regression(city_lat, city_lon, city_lvl):
    weeks = sorted(data['week'].unique())
    predictions = []

    for week in weeks:
        # Filter data for the specific week
        week_data = data[data['week'] == week]

        # Extract features for regression
        X = week_data[['lvl']].values  # Altitude
        y_avg = week_data['precipitation_avg'].values
        y_max = week_data['precipitation_max'].values

        # Perform linear regression on altitude
        reg_avg = LinearRegression().fit(X, y_avg)
        reg_max = LinearRegression().fit(X, y_max)

        # Calculate residuals
        residuals_avg = y_avg - reg_avg.predict(X)
        residuals_max = y_max - reg_max.predict(X)

        # Perform kriging on residuals
        kriging_avg = OrdinaryKriging(
            week_data['lat'], week_data['lon'], residuals_avg,
            variogram_model='linear', verbose=False, enable_plotting=False
        )
        kriging_max = OrdinaryKriging(
            week_data['lat'], week_data['lon'], residuals_max,
            variogram_model='linear', verbose=False, enable_plotting=False
        )

        # Predict residuals for the city
        residual_avg, _ = kriging_avg.execute('points', float(city_lat), float(city_lon))
        residual_max, _ = kriging_max.execute('points', float(city_lat), float(city_lon))

        # Add the altitude effect back to the prediction
        pred_avg = reg_avg.predict([[city_lvl]])[0] + residual_avg[0]
        pred_max = reg_max.predict([[city_lvl]])[0] + residual_max[0]

        # Store the predictions for the week
        predictions.append({
            'week': week,
            'predicted_precipitation_avg': pred_avg,
            'predicted_precipitation_max': pred_max
        })

    return predictions

results = []
for _, city in cities_data.iterrows():
    city_name = city['CITY']
    city_lat = city['LAT']
    city_lon = city['LONG']
    city_lvl = city['ALT']

    # Predict precipitation for the current city
    city_predictions = predict_precipitation_regression(city_lat, city_lon, city_lvl)

    # Add city name to each prediction
    for prediction in city_predictions:
        prediction['CITY'] = city_name
        results.append(prediction)

# Convert results to a DataFrame and save as a CSV file
output_df = pd.DataFrame(results)
output_df.to_csv("../kriging/data/predicted_precipitation_by_week.csv", index=False)

print("Predictions saved to '../kriging/data/predicted_precipitation_by_week.csv'")


Predictions saved to '../kriging/data/predicted_precipitation_by_week.csv'


Normal way without altitude

In [32]:
import pandas as pd
import numpy as np
from pykrige.ok import OrdinaryKriging

# Load the sorted data
file_path = "../kriging/data/sorted_precipitation_data.csv"
data = pd.read_csv(file_path)

# Function to perform kriging for a given city location and predict precipitation
def predict_precipitation_ordinary(city_lat, city_lon):
    # Get unique weeks from the data
    unique_weeks = data['week'].unique()

    # Store predictions for each week
    predictions = []

    # Iterate through each week
    for week in unique_weeks:
        # Filter data for the current week
        week_data = data[data['week'] == week]

        # Prepare input arrays for kriging
        lats = week_data['lat'].values
        lons = week_data['lon'].values

        avg_precip = week_data['precipitation_avg'].values
        max_precip = week_data['precipitation_max'].values

        # Ordinary Kriging for precipitation_avg
        ok_avg = OrdinaryKriging(
            lats, lons, avg_precip,
            variogram_model='linear',
            verbose=False,
            enable_plotting=False
        )
        predicted_avg, _ = ok_avg.execute('points', float(city_lat), float(city_lon))

        # Ordinary Kriging for precipitation_max
        ok_max = OrdinaryKriging(
            lats, lons, max_precip,
            variogram_model='linear',
            verbose=False,
            enable_plotting=False
        )
        predicted_max, _ = ok_max.execute('points', float(city_lat), float(city_lon))

        # Append predictions for this week
        predictions.append({
            'week': week,
            'precipitation_avg': predicted_avg[0],
            'precipitation_max': predicted_max[0]
        })

    return predictions  # Return a list of dictionaries, not a string

# Example cities data (load your actual cities CSV file)
cities_data = pd.read_csv("../kriging/data/filtered_RJ_lat_long_data.csv")

results = []
for _, city in cities_data.iterrows():
    city_name = city['CITY']
    city_lat = city['LAT']
    city_lon = city['LONG']
    city_lvl = city['ALT']

    # Predict precipitation for the current city
    city_predictions = predict_precipitation_ordinary(city_lat, city_lon)

    for prediction in city_predictions:
        prediction['CITY'] = city_name  # Add the city name to the prediction
        results.append(prediction)

# Convert results to a DataFrame and save as a CSV file
output_df = pd.DataFrame(results)
output_df.to_csv("../kriging/data/predicted_precipitation_by_week_ordinary.csv", index=False)

print("Predictions saved to '../kriging/data/predicted_precipitation_by_week_ordinary.csv'")


Predictions saved to '../kriging/data/predicted_precipitation_by_week_ordinary.csv'


In [33]:
# Read the CSV file
df = pd.read_csv('../kriging/data/predicted_precipitation_by_week_ordinary.csv')

# Replace negative precipitation values with 0
df['precipitation_avg'] = df['precipitation_avg'].apply(lambda x: max(x, 0))
df['precipitation_max'] = df['precipitation_max'].apply(lambda x: max(x, 0))

# Round the precipitation values to 4 decimal places
df['precipitation_avg'] = df['precipitation_avg'].round(4)
df['precipitation_max'] = df['precipitation_max'].round(4)

# Save the updated DataFrame back to the same CSV file
df.to_csv('../kriging/data/predicted_precipitation_by_week_ordinary.csv', index=False)

print("Negative precipitation values have been replaced with 0 and rounded to 4 decimal places.")

Negative precipitation values have been replaced with 0.
