In [1]:
import pandas as pd

# Load the dataset
df = pd.read_csv("Table_Hrly_ACRE.csv")

# Print shape, columns, and a few rows
print("Shape (rows, columns):", df.shape)
print("\nColumns:")
print(df.columns.tolist())

print("\nFirst 5 rows:")
print(df.head().to_string())


Shape (rows, columns): (1364, 27)

Columns:
['Unnamed: 0', 'ACRE', 'ACRE.1', 'ACRE.2', 'ACRE.3', 'ACRE.4', 'ACRE.5', 'ACRE.6', 'ACRE.7', 'ACRE.8', 'ACRE.9', 'ACRE.10', 'ACRE.11', 'ACRE.12', 'ACRE.13', 'ACRE.14', 'ACRE.15', 'ACRE.16', 'ACRE.17', 'ACRE.18', 'ACRE.19', 'ACRE.20', 'ACRE.21', 'ACRE.22', 'ACRE.23', 'ACRE.24', 'ACRE.25']

First 5 rows:
            Unnamed: 0           ACRE               ACRE.1               ACRE.2             ACRE.3                 ACRE.4              ACRE.5                   ACRE.6                    ACRE.7            ACRE.8              ACRE.9          ACRE.10                 ACRE.11                  ACRE.12            ACRE.13                    ACRE.14            ACRE.15                    ACRE.16            ACRE.17                    ACRE.18             ACRE.19                     ACRE.20             ACRE.21        ACRE.22          ACRE.23          ACRE.24                      ACRE.25
0          Time (LST)   Air Temp (°F)  0.5 m Air Temp (°F)  1.5 m Air T

In [2]:
import pandas as pd

df = pd.read_csv("Table_Hrly_ACRE.csv", header=1)

print("Shape:", df.shape)
print("\nColumns:")
for c in df.columns:
    print(c)


Shape: (1363, 27)

Columns:
Time (LST) 
Air Temp (°F)
0.5 m Air Temp (°F)
1.5 m Air Temp (°F)
3 m Air Temp (°F)
Relative Humidity (%)
Precipitation (in)
Accumulated Precip (in)
Solar Radiation (W / m²)
Wind Speed (mph)
Wind Direction (°)
Wind Gust (mph)
4" Bare Soil Temp (°F)
4" Grass Soil Temp (°F)
2" Soil Temp (°F)
2" Soil Water Content (%)
4" Soil Temp (°F)
4" Soil Water Content (%)
8" Soil Temp (°F)
8" Soil Water Content (%)
20" Soil Temp (°F)
20" Soil Water Content (%)
Inversion Strength
Max Inversion
Heat Index (°F)
Wind Chill (°F)
Barometric Pressure (in Hg)


In [5]:
print(df.columns.tolist())
df.columns = df.columns.str.strip()
print(df.columns.tolist())


['Time (LST) ', 'Air Temp (°F)', '0.5 m Air Temp (°F)', '1.5 m Air Temp (°F)', '3 m Air Temp (°F)', 'Relative Humidity (%)', 'Precipitation (in)', 'Accumulated Precip (in)', 'Solar Radiation (W / m²)', 'Wind Speed (mph)', 'Wind Direction (°)', 'Wind Gust (mph)', '4" Bare Soil Temp (°F)', '4" Grass Soil Temp (°F)', '2" Soil Temp (°F)', '2" Soil Water Content (%)', '4" Soil Temp (°F)', '4" Soil Water Content (%)', '8" Soil Temp (°F)', '8" Soil Water Content (%)', '20" Soil Temp (°F)', '20" Soil Water Content (%)', 'Inversion Strength', 'Max Inversion', 'Heat Index (°F)', 'Wind Chill (°F)', 'Barometric Pressure (in Hg)']
['Time (LST)', 'Air Temp (°F)', '0.5 m Air Temp (°F)', '1.5 m Air Temp (°F)', '3 m Air Temp (°F)', 'Relative Humidity (%)', 'Precipitation (in)', 'Accumulated Precip (in)', 'Solar Radiation (W / m²)', 'Wind Speed (mph)', 'Wind Direction (°)', 'Wind Gust (mph)', '4" Bare Soil Temp (°F)', '4" Grass Soil Temp (°F)', '2" Soil Temp (°F)', '2" Soil Water Content (%)', '4" Soil 

In [6]:
import pandas as pd
import numpy as np

# ------------------------------------------------
# 1. Load and use the correct header row
# ------------------------------------------------
df = pd.read_csv("Table_Hrly_ACRE.csv", header=1)

# Strip whitespace from all column names
df.columns = df.columns.str.strip()

# ------------------------------------------------
# 2. Convert Time column to datetime
# ------------------------------------------------
df["Time (LST)"] = pd.to_datetime(df["Time (LST)"], errors="coerce")

# ------------------------------------------------
# 3. Drop rows where ALL measurement fields are NaN
# ------------------------------------------------
df = df.dropna(how="all")

# ------------------------------------------------
# 4. Convert all numeric columns to numeric (except time)
# ------------------------------------------------
for col in df.columns:
    if col != "Time (LST)":
        df[col] = pd.to_numeric(df[col], errors="coerce")

# ------------------------------------------------
# 5. Drop columns that are entirely NaN
# ------------------------------------------------
df = df.dropna(axis=1, how="all")

# ------------------------------------------------
# 6. Save cleaned dataset
# ------------------------------------------------
df.to_csv("Table_Hrly_ACRE_clean.csv", index=False)

print("Cleaning complete.")
print("New shape:", df.shape)
print("Saved as Table_Hrly_ACRE_clean.csv")


Cleaning complete.
New shape: (1363, 26)
Saved as Table_Hrly_ACRE_clean.csv


In [7]:
import pandas as pd

df = pd.read_csv("Table_Hrly_ACRE_clean.csv")

print("Any NaNs present?:", df.isna().any().any())

print("\nNaN count per column:")
print(df.isna().sum())


Any NaNs present?: True

NaN count per column:
Time (LST)                       0
Air Temp (°F)                    0
0.5 m Air Temp (°F)              0
1.5 m Air Temp (°F)              0
3 m Air Temp (°F)                0
Relative Humidity (%)            0
Precipitation (in)               0
Accumulated Precip (in)          0
Solar Radiation (W / m²)         2
Wind Speed (mph)                 0
Wind Direction (°)               0
Wind Gust (mph)                  0
4" Bare Soil Temp (°F)           0
4" Grass Soil Temp (°F)          0
2" Soil Temp (°F)                0
2" Soil Water Content (%)        0
4" Soil Temp (°F)                0
4" Soil Water Content (%)        0
8" Soil Temp (°F)                0
8" Soil Water Content (%)        0
20" Soil Temp (°F)               0
20" Soil Water Content (%)       0
Inversion Strength               0
Max Inversion                    0
Heat Index (°F)                936
Wind Chill (°F)               1362
dtype: int64
