In [18]:
import pandas as pd
import matplotlib.pyplot as plt

In [6]:
file_path = 'jaar.txt'  # replace with actual file path
columns = ['STN', 'YYYYMMDD', 'DDVEC', 'FHVEC', 'FG', 'FHX', 'FHXH', 'FHN', 'FHNH', 'FXX', 'FXXH', 'TG', 'TN', 'TNH', 'TX', 'TXH', 'T10N', 'T10NH', 'SQ', 'SP', 'Q', 'DR', 'RH', 'RHX', 'RHXH', 'PG', 'PX', 'PXH', 'PN', 'PNH', 'VVN', 'VVNH', 'VVX', 'VVXH', 'NG', 'UG', 'UX', 'UXH', 'UN', 'UNH', 'EV24']

# Load data with relevant columns
df = pd.read_csv(file_path, header=None, names=columns, usecols=['YYYYMMDD', 'TX'])

# Ensure 'YYYYMMDD' column is treated as string (in case of any numeric type conversion)
df['YYYYMMDD'] = df['YYYYMMDD'].astype(str)

# Remove rows where 'YYYYMMDD' contains non-numeric or missing values
df = df[df['YYYYMMDD'].str.isdigit()]

# Convert date from YYYYMMDD to datetime
df['YYYYMMDD'] = pd.to_datetime(df['YYYYMMDD'], format='%Y%m%d', errors='coerce')

# Drop rows where date conversion failed (invalid dates)
df = df.dropna(subset=['YYYYMMDD'])

# Convert the 'TX' column to numeric (handling any non-numeric values by coercing them to NaN)
df['TX'] = pd.to_numeric(df['TX'], errors='coerce')

# Drop rows with missing or invalid temperature values (NaN)
df = df.dropna(subset=['TX'])

# Convert TX from 0.1 degrees Celsius to Celsius
df['TX'] = df['TX'] / 10.0

# Find the first and last dates when TX is greater than or equal to 20°C
first_20_deg_day = df[df['TX'] >= 20].iloc[0]
last_20_deg_day = df[df['TX'] >= 20].iloc[-1]

# Output the results
print(f"First day when temperature hit or exceeded 20°C: {first_20_deg_day['YYYYMMDD'].date()} with temperature {first_20_deg_day['TX']}°C")
print(f"Last day when temperature hit or exceeded 20°C: {last_20_deg_day['YYYYMMDD'].date()} with temperature {last_20_deg_day['TX']}°C")

First day when temperature hit or exceeded 20°C: 2024-04-06 with temperature 22.6°C
Last day when temperature hit or exceeded 20°C: 2024-09-22 with temperature 24.2°C


In [26]:
file_path = 'etmgeg.txt'  # Replace with your file path
df = pd.read_csv(file_path, sep='\t')

df.columns = df.columns.str.strip()

# Convert 'YYYYMMDD' column to datetime
df['YYYYMMDD'] = pd.to_datetime(df['YYYYMMDD'], format='%Y%m%d')

# Convert temperature (TX) from tenths of a degree Celsius to degrees Celsius
df['TX'] = df['TX'] / 10

# Filter for temperatures above 20°C
df_above_20 = df[df['TX'] > 20]

# Group by year and find the first and last date where temperature exceeds 20°C
result = df_above_20.groupby(df_above_20['YYYYMMDD'].dt.year).agg(
    first_above_20=('YYYYMMDD', 'first'),
    last_above_20=('YYYYMMDD', 'last')
).reset_index()

# Format the dates to show only month and day, but keep the year
result['first_above_20'] = result['first_above_20'].dt.strftime('%Y-%m-%d')
result['last_above_20'] = result['last_above_20'].dt.strftime('%Y-%m-%d')

# Create a summary table
summary_table = result[['YYYYMMDD', 'first_above_20', 'last_above_20']]
summary_table.columns = ['Year', 'First Day Above 20°C', 'Last Day Above 20°C']

# Convert to LaTeX table format
latex_table = summary_table.to_latex(index=False)

# Display the LaTeX table
print(latex_table)

\begin{tabular}{rll}
\toprule
Year & First Day Above 20°C & Last Day Above 20°C \\
\midrule
2010 & 2010-04-25 & 2010-10-04 \\
2011 & 2011-04-02 & 2011-10-03 \\
2012 & 2012-04-30 & 2012-10-22 \\
2013 & 2013-04-14 & 2013-10-22 \\
2014 & 2014-03-20 & 2014-10-19 \\
2015 & 2015-04-15 & 2015-09-12 \\
2016 & 2016-04-03 & 2016-09-28 \\
2017 & 2017-03-30 & 2017-10-19 \\
2018 & 2018-04-07 & 2018-10-17 \\
2019 & 2019-04-07 & 2019-10-13 \\
2020 & 2020-04-06 & 2020-10-21 \\
2021 & 2021-03-30 & 2021-09-27 \\
2022 & 2022-04-12 & 2022-10-30 \\
2023 & 2023-05-04 & 2023-10-13 \\
\bottomrule
\end{tabular}



In [16]:
import pandas as pd
import matplotlib.pyplot as plt

file_path = 'etmgeg_260.txt'  # Replace with your file path
df = pd.read_csv(file_path, sep='\t', skiprows=54)

# Clean column names
df.columns = df.columns.str.strip()

# Convert 'YYYYMMDD' column to datetime
if 'YYYYMMDD' in df.columns:
    df['YYYYMMDD'] = pd.to_datetime(df['YYYYMMDD'], format='%Y%m%d')

# Convert temperature (TX) from tenths of a degree Celsius to degrees Celsius
if 'TX' in df.columns:
    df['TX'] = df['TX'] / 10

# Filter for temperatures above 20°C
df_above_20 = df[df['TX'] > 20]

# Extract the year from the 'YYYYMMDD' column
df_above_20.loc[:, 'Year'] = df_above_20['YYYYMMDD'].dt.year

# Group by year and find the first and last date where temperature exceeds 20°C
result = df_above_20.groupby('Year').agg(
    first_above_20=('YYYYMMDD', 'first'),
    last_above_20=('YYYYMMDD', 'last')
).reset_index()

# Print the result
print(result)



KeyError: 'TX'