In [2]:
import pandas as pd
import kagglehub


# Last ned datasettene
pathGermany = kagglehub.dataset_download("l3llff/wind-power")

# Les inn CSV-filene som DataFrames
df_germany = pd.read_csv(f"{pathGermany}/data.csv")

In [3]:
# Sjekk de første radene for å forstå datastrukturen
print("Data fra Tyskland:")
print(df_germany.head())

Data fra Tyskland:
                    dt      MW
0  2011-01-01 00:00:00  3416.0
1  2011-01-01 00:15:00  4755.0
2  2011-01-01 00:30:00  4939.0
3  2011-01-01 00:45:00  4939.0
4  2011-01-01 01:00:00  4998.0


In [4]:
# Konverter 'dt'-kolonnen til datetime-format
df_germany['dt'] = pd.to_datetime(df_germany['dt'], errors='coerce')

#Dato og MW fra en spesifikk selvvalgt rad 
print(df_germany.iloc[299959][['dt', 'MW']])

dt    2019-07-22 15:15:00
MW                1885.91
Name: 299959, dtype: object


In [5]:
# Filtrer data for spesifikk dato og tid
target_time = '2015-03-20 05:00:00'
mw_value = df_germany.loc[df_germany['dt'] == target_time, 'MW'].values
# Sjekk om verdien ble funnet
if len(mw_value) > 0:
    print(f"MW-verdien kl. 05:00 den 20.03.2015 er: {mw_value[0]} MW")
else:
    print("Ingen data funnet for denne datoen og klokkeslettet.")

MW-verdien kl. 05:00 den 20.03.2015 er: 132.0 MW


In [7]:
#Eksempel om jeg vil skrive ut hvor mange ganger MW verdien går under 100:
# Filtrer ut verdier som er under 100
mw_under_100_count = (df_germany['MW'] < 100).sum()
# Vis resultatet
print(f"Antall ganger MW er under 100: {mw_under_100_count}")

Antall ganger MW er under 100: 8815


In [6]:

# Filtrer de siste 5000 radene og velg rader der MW < 100
filtered_data = df_germany.iloc[-5000:][df_germany['MW'] < 100][['dt', 'MW']]
# Vis resultatet som en pent formatert tabell
print(filtered_data.to_string(index=False))  # For en ryddig terminalvisning


                 dt    MW
2021-12-21 12:45:00 96.13
2021-12-21 13:00:00 81.28
2021-12-21 13:15:00 75.77
2021-12-21 13:30:00 64.05
2021-12-21 13:45:00 60.27
2021-12-21 14:00:00 51.33
2021-12-21 14:15:00 50.40
2021-12-21 14:30:00 46.74
2021-12-21 14:45:00 44.55
2021-12-21 15:00:00 37.79
2021-12-21 15:15:00 42.52
2021-12-21 15:30:00 40.44
2021-12-21 15:45:00 44.59
2021-12-21 16:00:00 44.34
2021-12-21 16:15:00 45.56
2021-12-21 16:30:00 47.81
2021-12-21 16:45:00 54.15
2021-12-21 17:00:00 56.11
2021-12-21 17:15:00 59.57
2021-12-21 17:30:00 59.44
2021-12-21 17:45:00 57.28
2021-12-21 18:00:00 66.89
2021-12-21 18:15:00 64.95
2021-12-21 18:30:00 75.46
2021-12-21 18:45:00 86.89
2021-12-21 19:00:00 92.34
2021-12-21 19:15:00 90.09
2021-12-21 19:30:00 97.38
2021-12-21 20:00:00 92.05
2021-12-21 20:15:00 89.79
2021-12-21 20:30:00 98.10


  filtered_data = df_germany.iloc[-5000:][df_germany['MW'] < 100][['dt', 'MW']]


In [10]:
#List comprehensions
mw_values = [(row['dt'], row['MW']) for _, row in df_germany.iloc[-5000:].iterrows() if row['MW'] < 100]
# Vis resultatet
print(mw_values)

[(Timestamp('2021-12-21 12:45:00'), 96.13), (Timestamp('2021-12-21 13:00:00'), 81.28), (Timestamp('2021-12-21 13:15:00'), 75.77), (Timestamp('2021-12-21 13:30:00'), 64.05), (Timestamp('2021-12-21 13:45:00'), 60.27), (Timestamp('2021-12-21 14:00:00'), 51.33), (Timestamp('2021-12-21 14:15:00'), 50.4), (Timestamp('2021-12-21 14:30:00'), 46.74), (Timestamp('2021-12-21 14:45:00'), 44.55), (Timestamp('2021-12-21 15:00:00'), 37.79), (Timestamp('2021-12-21 15:15:00'), 42.52), (Timestamp('2021-12-21 15:30:00'), 40.44), (Timestamp('2021-12-21 15:45:00'), 44.59), (Timestamp('2021-12-21 16:00:00'), 44.34), (Timestamp('2021-12-21 16:15:00'), 45.56), (Timestamp('2021-12-21 16:30:00'), 47.81), (Timestamp('2021-12-21 16:45:00'), 54.15), (Timestamp('2021-12-21 17:00:00'), 56.11), (Timestamp('2021-12-21 17:15:00'), 59.57), (Timestamp('2021-12-21 17:30:00'), 59.44), (Timestamp('2021-12-21 17:45:00'), 57.28), (Timestamp('2021-12-21 18:00:00'), 66.89), (Timestamp('2021-12-21 18:15:00'), 64.95), (Timestamp(

In [22]:
#Iritator for å finne første forekomst der MW < 50
chunk_iter = pd.read_csv(f"{pathGermany}/data.csv", chunksize=1000)
# Finn første rad der MW er under 50
for chunk in chunk_iter:
    chunk['dt'] = pd.to_datetime(chunk['dt'], errors='coerce')  # Konverter 'dt' til datetime
    result = chunk[chunk['MW'] < 50]
    if not result.empty:
        print(result[['dt', 'MW']].head(1))
        break  # Stopper når første treff er funnet

                     dt    MW
901 2011-01-10 09:15:00  44.0


In [12]:
#SQL - inspirert spørring for MW < 100 på spesifikke datoer 
# Filtrer data med Pandas SQL (query)
filtered_data = df_germany.query("MW < 100 and dt >= '2015-03-01' and dt <= '2015-03-31'")
# Vis resultatet
print(filtered_data[['dt', 'MW']])

                        dt    MW
146935 2015-03-11 13:45:00  94.0
146936 2015-03-11 14:00:00  88.0
146937 2015-03-11 14:15:00  83.0
146938 2015-03-11 14:30:00  93.0
146939 2015-03-11 14:45:00  64.0
146940 2015-03-11 15:00:00  18.0
146941 2015-03-11 15:15:00  55.0
146942 2015-03-11 15:30:00  18.0
146943 2015-03-11 15:45:00  81.0
146944 2015-03-11 16:00:00  91.0
146945 2015-03-11 16:15:00  90.0
146946 2015-03-11 16:30:00  89.0
146947 2015-03-11 16:45:00  59.0
147749 2015-03-20 01:15:00  91.0
147750 2015-03-20 01:30:00  87.0
147751 2015-03-20 01:45:00  80.0
147752 2015-03-20 02:00:00  78.0
147753 2015-03-20 02:15:00  78.0
147754 2015-03-20 02:30:00  86.0
147755 2015-03-20 02:45:00  89.0
147756 2015-03-20 03:00:00  90.0
147757 2015-03-20 03:15:00  92.0
147758 2015-03-20 03:30:00  97.0
147759 2015-03-20 03:45:00  95.0
147783 2015-03-20 09:45:00  94.0
147784 2015-03-20 10:00:00  87.0
147785 2015-03-20 10:15:00  72.0
147786 2015-03-20 10:30:00  61.0
147787 2015-03-20 10:45:00  65.0
147788 201

In [20]:
# Iterator for å lese store datasett effektivt
chunk_iter = pd.read_csv(f"{pathGermany}/data.csv", chunksize=1000)

# List comprehension for å hente MW < 400 (MW) med dato (dt)
filtered_data = [
    {'Dato': row['dt'], 'MW': row['MW']}
    for chunk in chunk_iter
    for _, row in chunk.query("MW < 400").iterrows()
]

# Konverter listen til en ryddig DataFrame
filtered_df = pd.DataFrame(filtered_data)

# Vis tabellen
print(filtered_df)



                      Dato      MW
0      2011-01-02 00:15:00   66.00
1      2011-01-02 00:30:00   70.00
2      2011-01-02 00:45:00   79.00
3      2011-01-02 01:00:00   79.00
4      2011-01-02 01:15:00   85.00
...                    ...     ...
43725  2021-12-22 00:15:00  248.50
43726  2021-12-22 00:30:00  260.55
43727  2021-12-22 00:45:00  283.06
43728  2021-12-22 01:00:00  308.77
43729  2021-12-22 01:15:00  342.49

[43730 rows x 2 columns]
