Skip to content

Commit

Permalink
Added K-Means clustering method in YearlyRainfall class
Browse files Browse the repository at this point in the history
  • Loading branch information
paul-florentin-charles committed Jul 24, 2023
1 parent 7bb28dc commit e163512
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 11 deletions.
34 changes: 28 additions & 6 deletions src/classes/yearly_rainfall.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from scipy import signal
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.cluster import KMeans

import src.config as cfg
from src.decorators import plots
Expand All @@ -29,7 +30,7 @@ def __init__(self,
self.starting_year: int = cfg.get_start_year() \
if start_year is None \
else start_year
self.round_precision: int = cfg.get_round_precision() \
self.round_precision: int = cfg.get_rainfall_precision() \
if round_precision is None \
else round_precision
self.yearly_rainfall: pd.DataFrame = self.load_yearly_rainfall() \
Expand Down Expand Up @@ -209,7 +210,7 @@ def add_linear_regression(self) -> (float, float):
years: np.ndarray = self.yearly_rainfall[Label.YEAR.value].values.reshape(-1, 1)
rainfalls: np.ndarray = self.yearly_rainfall[Label.RAINFALL.value].values

reg = LinearRegression()
reg: LinearRegression = LinearRegression()
reg.fit(years, rainfalls)
self.yearly_rainfall[Label.LINEAR_REGRESSION.value] = reg.predict(years)
self.yearly_rainfall[Label.LINEAR_REGRESSION.value] = round(
Expand All @@ -235,6 +236,19 @@ def add_savgol_filter(self) -> None:
self.yearly_rainfall[Label.SAVITZKY_GOLAY_FILTER.value] = round(
self.yearly_rainfall[Label.SAVITZKY_GOLAY_FILTER.value], self.round_precision)

def add_kmeans(self) -> None:
"""
Compute and add K-Mean clustering of Rainfallc according to Year
to our pandas DataFrame.
:return: None
"""
fit_data: np.ndarray = self.yearly_rainfall[[Label.YEAR.value, Label.RAINFALL.value]].values

kmeans: KMeans = KMeans(n_init=10, n_clusters=cfg.get_kmeans_clusters())
kmeans.fit(fit_data)
self.yearly_rainfall[Label.KMEANS.value] = kmeans.predict(fit_data)

@plots.legend_and_show()
def plot_rainfall(self, title: Optional[str] = None) -> None:
"""
Expand All @@ -244,7 +258,7 @@ def plot_rainfall(self, title: Optional[str] = None) -> None:
:return: None
"""
for column_label in self.yearly_rainfall.columns[1:]:
if column_label == Label.PERCENTAGE_OF_NORMAL.value:
if column_label in [Label.PERCENTAGE_OF_NORMAL.value, Label.KMEANS.value]:
continue

plt.plot(self.yearly_rainfall[Label.YEAR.value],
Expand All @@ -268,9 +282,17 @@ def plot_normal(self, title: Optional[str] = None) -> None:
return

plt.axhline(y=100.0, color='orange', linestyle='dashed', label='Normal')
plt.scatter(self.yearly_rainfall[Label.YEAR.value],
self.yearly_rainfall[Label.PERCENTAGE_OF_NORMAL.value],
label=Label.PERCENTAGE_OF_NORMAL.value)
if Label.KMEANS.value not in self.yearly_rainfall.columns:
plt.scatter(self.yearly_rainfall[Label.YEAR.value],
self.yearly_rainfall[Label.PERCENTAGE_OF_NORMAL.value],
label=Label.PERCENTAGE_OF_NORMAL.value)
else:
year_rain: pd.DataFrame = self.yearly_rainfall
for label_value in range(cfg.get_kmeans_clusters()):
year_rain = year_rain[year_rain[Label.KMEANS.value] == label_value]
plt.scatter(year_rain[Label.YEAR.value],
year_rain[Label.PERCENTAGE_OF_NORMAL.value])
year_rain = self.yearly_rainfall

if title is not None:
plt.title(title)
Expand Down
18 changes: 16 additions & 2 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def get_start_year() -> int:
return start_year


def get_round_precision() -> int:
def get_rainfall_precision() -> int:
"""
The decimal precision of Rainfall values.
Expand All @@ -51,6 +51,20 @@ def get_round_precision() -> int:
rounding_precision: int = 0
with open(CONFIG_FILE_PATH, mode=MODE, encoding=UTF_8) as stream:
yaml_config: dict = safe_load(stream)
rounding_precision += yaml_config['data']['round_precision']
rounding_precision += yaml_config['data']['rainfall_precision']

return rounding_precision


def get_kmeans_clusters() -> int:
"""
The number of clusters to use for K-Means clustering of Rainfall data.
:return: A number of clusters as an Integer.
"""
n_kmeans_clusters: int = 0
with open(CONFIG_FILE_PATH, mode=MODE, encoding=UTF_8) as stream:
yaml_config: dict = safe_load(stream)
n_kmeans_clusters += yaml_config['data']['kmeans_clusters']

return n_kmeans_clusters
3 changes: 2 additions & 1 deletion src/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ dataset:

data:
start_year: 1970
round_precision: 2
rainfall_precision: 2
kmeans_clusters: 4
1 change: 1 addition & 0 deletions src/enums/labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ class Label(str, Enum):
PERCENTAGE_OF_NORMAL: str = 'Percentage of normal'
LINEAR_REGRESSION: str = 'Linear regression'
SAVITZKY_GOLAY_FILTER: str = 'Savitzky–Golay filter'
KMEANS: str = 'K-Means'
8 changes: 6 additions & 2 deletions src/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,15 @@ def run() -> None:
by_year: bool = False
by_month: bool = False

month: Month = Month.JULY
season: Season = Season.SUMMER

if by_year:
yearly_rainfall: YearlyRainfall = YearlyRainfall()
elif by_month:
yearly_rainfall: MonthlyRainfall = MonthlyRainfall(Month.JANUARY)
yearly_rainfall: MonthlyRainfall = MonthlyRainfall(month)
else:
yearly_rainfall: SeasonalRainfall = SeasonalRainfall(Season.WINTER)
yearly_rainfall: SeasonalRainfall = SeasonalRainfall(season)

avg_1970_2000 = yearly_rainfall.get_average_yearly_rainfall(1970, 2000)
avg_1980_2010 = yearly_rainfall.get_average_yearly_rainfall(1980, 2010)
Expand All @@ -45,6 +48,7 @@ def run() -> None:
print("Slope (in mm/year):", slope)

yearly_rainfall.add_savgol_filter()
yearly_rainfall.add_kmeans()

yearly_rainfall.plot_rainfall()
yearly_rainfall.plot_normal()
Expand Down

0 comments on commit e163512

Please sign in to comment.