In [0]:
from pyspark.sql.functions import lit

#### Loading CSV with Weather data

In [0]:
df=spark.read.csv("dbfs:/FileStore/shared_uploads/075bei015.kshitiz@pcampus.edu.np/Weather_data.csv", header=True, inferSchema=True)

In [0]:
display(df)

id,city_name,lon,lat,date,time,temp,temp_min,temp_max,pressure,humidity,dt,timezone,visibility,wind_deg,wind_speed,wind_gust,clouds_all
1282616.0,Wali?,83.76667,27.98333,2023-06-08T12:17:32.000+0000,2023-06-09T12:17:32.000+0000,312.06,312.06,312.06,1001,14,1686205953,20700,10000,146.0,2.65,3.37,8.0
1282621.0,Upardang Gadhi,84.566666,27.766666,2023-06-08T12:17:32.000+0000,2023-06-09T12:17:32.000+0000,310.8,310.8,310.8,1000,14,1686205955,20700,10000,210.0,4.1,4.28,9.0
1282635.0,Tulsipur,82.297256,28.130989,2023-06-08T12:17:32.000+0000,2023-06-09T12:17:32.000+0000,310.78,310.78,310.78,1003,17,1686205411,20700,10000,227.0,6.24,5.87,5.0
1282665.0,Tikoli,84.5,27.633333,2023-06-08T12:17:32.000+0000,2023-06-09T12:17:32.000+0000,315.97,315.97,315.97,1001,13,1686205413,20700,10000,206.0,3.95,3.55,2.0
1282666.0,?ikapur,81.133331,28.5,2023-06-08T12:17:32.000+0000,2023-06-09T12:17:32.000+0000,314.91,314.91,314.91,1001,12,1686205415,20700,10000,252.0,3.95,2.73,0.0
1282616.0,Wali?,83.76667,27.98333,2023-06-08T13:18:39.000+0000,2023-06-09T13:18:39.000+0000,312.44,312.44,312.44,1000,14,1686209620,20700,10000,155.0,2.75,3.24,12.0
1282621.0,Upardang Gadhi,84.566666,27.766666,2023-06-08T13:18:39.000+0000,2023-06-09T13:18:39.000+0000,311.1,311.1,311.1,999,13,1686209622,20700,10000,208.0,3.79,4.64,12.0
1282635.0,Tulsipur,82.297256,28.130989,2023-06-08T13:18:39.000+0000,2023-06-09T13:18:39.000+0000,311.66,311.66,311.66,1001,15,1686209624,20700,10000,231.0,5.56,6.25,35.0
1282665.0,Tikoli,84.5,27.633333,2023-06-08T13:18:39.000+0000,2023-06-09T13:18:39.000+0000,316.94,316.94,316.94,998,12,1686209625,20700,10000,202.0,3.89,4.71,2.0
1282666.0,?ikapur,81.133331,28.5,2023-06-08T13:18:39.000+0000,2023-06-09T13:18:39.000+0000,316.76,316.76,316.76,999,10,1686209627,20700,10000,219.0,3.05,2.59,0.0


###### Adding a column to check forcasted data or not

In [0]:
df = df.withColumn("Forcasted", lit("False"))

In [0]:
df.createOrReplaceTempView("temp_table")

In [0]:
spark.sql("CREATE TABLE weather_data USING delta AS SELECT * FROM temp_table")

Out[96]: DataFrame[num_affected_rows: bigint, num_inserted_rows: bigint]

#### Forcasting the data

In [0]:
def forcast_next_hour():
    df_forcasted =  spark.sql(f"""
        with cte as(
        SELECT *
        FROM (
          SELECT *,
                 ROW_NUMBER() OVER (PARTITION BY city_name ORDER BY time DESC) AS row_num
          FROM weather_data
        ) t
        WHERE row_num <= 4
        )
        select
          id,
          city_name,
          lon,
          lat,
          max(date),
          DATEADD(hour,1,max(time)),
          Round(avg(temp),2),
          temp_min,
          temp_max,
          pressure,
          humidity,
          dt,
          timezone,
          visibility,
          wind_deg,
          wind_speed,
          wind_gust,
          clouds_all,
          'True' as Forcasted
        from cte
        where time = (select max(time) from cte)
        group by
          id,
          city_name,
          lon,
          lat,
          temp_min,
          temp_max,
          pressure,
          humidity,
          dt,
          timezone,
          visibility,
          wind_deg,
          wind_speed,
          wind_gust,
          clouds_all      
    """)
    return df_forcasted

###### Forcasting data for next four hours

In [0]:
for i in range(4):
    forcasted_data=forcast_next_hour()
    df=df.union(forcasted_data)

In [0]:
df.createOrReplaceTempView("weather_forcast_table")

In [0]:
%sql

select * from weather_forcast_table

id,city_name,lon,lat,date,time,temp,temp_min,temp_max,pressure,humidity,dt,timezone,visibility,wind_deg,wind_speed,wind_gust,clouds_all,Forcasted
1282616.0,Wali?,83.76667,27.98333,2023-06-08 12:17:32,2023-06-09T12:17:32.000+0000,312.06,312.06,312.06,1001,14,1686205953,20700,10000,146.0,2.65,3.37,8.0,False
1282621.0,Upardang Gadhi,84.566666,27.766666,2023-06-08 12:17:32,2023-06-09T12:17:32.000+0000,310.8,310.8,310.8,1000,14,1686205955,20700,10000,210.0,4.1,4.28,9.0,False
1282635.0,Tulsipur,82.297256,28.130989,2023-06-08 12:17:32,2023-06-09T12:17:32.000+0000,310.78,310.78,310.78,1003,17,1686205411,20700,10000,227.0,6.24,5.87,5.0,False
1282665.0,Tikoli,84.5,27.633333,2023-06-08 12:17:32,2023-06-09T12:17:32.000+0000,315.97,315.97,315.97,1001,13,1686205413,20700,10000,206.0,3.95,3.55,2.0,False
1282666.0,?ikapur,81.133331,28.5,2023-06-08 12:17:32,2023-06-09T12:17:32.000+0000,314.91,314.91,314.91,1001,12,1686205415,20700,10000,252.0,3.95,2.73,0.0,False
1282616.0,Wali?,83.76667,27.98333,2023-06-08 13:18:39,2023-06-09T13:18:39.000+0000,312.44,312.44,312.44,1000,14,1686209620,20700,10000,155.0,2.75,3.24,12.0,False
1282621.0,Upardang Gadhi,84.566666,27.766666,2023-06-08 13:18:39,2023-06-09T13:18:39.000+0000,311.1,311.1,311.1,999,13,1686209622,20700,10000,208.0,3.79,4.64,12.0,False
1282635.0,Tulsipur,82.297256,28.130989,2023-06-08 13:18:39,2023-06-09T13:18:39.000+0000,311.66,311.66,311.66,1001,15,1686209624,20700,10000,231.0,5.56,6.25,35.0,False
1282665.0,Tikoli,84.5,27.633333,2023-06-08 13:18:39,2023-06-09T13:18:39.000+0000,316.94,316.94,316.94,998,12,1686209625,20700,10000,202.0,3.89,4.71,2.0,False
1282666.0,?ikapur,81.133331,28.5,2023-06-08 13:18:39,2023-06-09T13:18:39.000+0000,316.76,316.76,316.76,999,10,1686209627,20700,10000,219.0,3.05,2.59,0.0,False
