# Predict the flowering date of this year's cherry blossoms by analyzing temperature data and past flowering information

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
# Any results you write to the current directory are saved as output.

# 1. Import training data

In [None]:
data_csv = pd.read_csv("/kaggle/input/temperature-and-flower-status/hirosaki_temp_cherry_bloom.csv")
df = pd.DataFrame(data_csv)

# Split date into year,month,day
dateList = df['date'].str.split('/', expand=True)
df['year'], df['month'], df['day'] = dateList[0], dateList[1], dateList[2]
df.info()

# 2. Make up for missing 'flower_status' column

In [None]:
# 0:Before blooming
# 1:Bloom
# 2:Full bloom
# 3:Scatter

new_df = []
for i in range(len(df)):
    year, month, day = df['year'][i], df['month'][i], df['day'][i]
    temperature = df['temperature'][i]
    flower_status = df['flower_status'][i]
    if month == '1' and day == '1':
        status = 0
    else:
        if flower_status == 'bloom':
            status = 1
        elif flower_status == 'full':
            status = 2
        elif flower_status == 'scatter':
            status = 3
    innerList = {'year':year, 'month':month, 'day':day, 'temperature':temperature, 'flower_status':status}
    new_df.append(innerList)
new_df = pd.DataFrame(new_df)
new_df

In [None]:
# Extract data from March 1 to May 31
new_df_2 = []
for i in range(len(new_df)):
    month = new_df['month'][i]
    if month == '3' or month == '4' or month == '5':
        innerList = {'month':month, 'day':new_df['day'][i], 'temperature':new_df['temperature'][i], 'flower_status':new_df['flower_status'][i]}
        new_df_2.append(innerList)
new_df_2 = pd.DataFrame(new_df_2)
new_df_2

In [None]:
# Add the cumulative temperature to the column
new_df_3 = []
for i in range(len(new_df_2)):
    month, day = new_df_2['month'][i], new_df_2['day'][i]
    if month == '3' and day == '1':
        temp_accum = 0
    temp = new_df_2['temperature'][i]
    temp_accum += temp
    status = new_df_2['flower_status'][i]
    innerList = {'month':month, 'day':day, 'temperature':temp, 'temp_accum':temp_accum, 'flower_status':status}
    new_df_3.append(innerList)
new_df_3 = pd.DataFrame(new_df_3)
new_df_3

# 3. Shape after dividing temperature data and flowering status

In [None]:
x = pd.DataFrame(new_df_3.drop('flower_status', axis = 1))
y = pd.DataFrame(new_df_3['flower_status'])
x = np.array(x)
y = np.array(y)

y = y.ravel()
data = x.astype('float32')
labels = y.astype('int32')

print('data shape:', data.shape) # (n, m)
print(data[:10])
print('labels shape:', labels.shape) # (n,)
print(labels[:10])

# 4. Learn with scikit-learn

In [None]:
#from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# scaling
#features = preprocessing.minmax_scale(data[:, :])
#transformer = MinMaxScaler()
features = MinMaxScaler().fit_transform(data)

# split data for train and test
x_train, x_test, y_train, y_test = train_test_split(features, labels.ravel(), test_size=0.3)

print(x_train)
print(x_test)
print(y_train)
print(y_test)

In [None]:
from sklearn import neural_network

clf = neural_network.MLPClassifier(max_iter=1000,       # default:200
                                   activation="relu",   # default:"relu"
                                   solver="adam",       # default:"adam"
                                   alpha=0.0001,        # default:0.0001
                                   verbose=True,        # default:False
                                   early_stopping=False)# default:False

In [None]:
clf.fit(x_train, y_train)

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
import seaborn as sns
import matplotlib.pyplot as plt

predict = clf.predict(x_test)

cm = confusion_matrix(y_test, predict)
sns.heatmap(cm, annot=True, cmap='Blues')
plt.show()

In [None]:
classReport = classification_report(y_test, predict)
print(classReport)

score = accuracy_score(y_test, predict)
print('accuracy :', '{:.5f}'.format(score))

# 5. Import data to predict

In [None]:
# data from Japan Meteorological Agency (Actual and 2-week forecast)
pre_csv = pd.read_csv('/kaggle/input/hirosaki-this-year/hirosaki_this_year.csv')
pre_df = pd.DataFrame(pre_csv)
# Split date into year,month,day
dateList = pre_df['date'].str.split('/', expand=True)
pre_df['year'], pre_df['month'], pre_df['day'] = dateList[0], dateList[1], dateList[2]
pre_df.info()

# 6. Predict future temperatures

In [None]:
# Fill in missing values by predicting future temperatures from differences between past average and this year
new_df_4 = []
for i in range(len(new_df_3)):
    year, month, day, temperature = new_df['year'][i], new_df['month'][i], new_df['day'][i], new_df['temperature'][i]
    # cut Feb 29
    if not(month == '2' and day == '29'):
        innerList = {'year':year, 'month':month, 'day':day, 'temperature':temperature}
        new_df_4.append(innerList)
new_df_4 = pd.DataFrame(new_df_4)
ary_diff = []
for i in range(len(pre_df)):
    m = pre_df['month'][i]
    d = pre_df['day'][i]
    if not(m == '2' and d == '29'):
        if pd.isnull(pre_df['temperature'][i]):
            break
        else:
            # Temperature of this year
            pre_temp = pre_df['temperature'][i]
            # Average temperature of past year(Same month and same day)
            df_m = new_df_4[new_df_4['month'] == m]
            df_m_d = df_m[df_m['day'] == d]
            temp_mean = df_m_d['temperature'].mean()
            # Difference between this year and average
            diff = pre_temp - temp_mean
            ary_diff.append(diff)
ary_diff = pd.DataFrame(ary_diff)
# Overall average of difference
diff_mean = ary_diff.mean()
pre_df_2 = []
for i in range(len(pre_df)):
    y = pre_df['year'][i]
    m = pre_df['month'][i]
    d = pre_df['day'][i]
    if pd.isnull(pre_df['temperature'][i]):
        # Predicted temperature
        df_m = new_df_4[new_df_4['month'] == m]
        df_m_d = df_m[df_m['day'] == d]
        temp_mean = df_m_d['temperature'].mean()


#--------------------------------------------------------
        weight = 1
        # Change as needed
        # Usually 1
#--------------------------------------------------------


        add = int(diff_mean * weight * 1000) / 1000
        temperature = temp_mean + add
    else:
        # Actual temperature
        temperature = pre_df['temperature'][i]
    if float(y) > 0 and float(m) > 0 and float(d) > 0:
        inner_dic = {'year':y, 'month':m, 'day':d, 'temperature':temperature}
        pre_df_2.append(inner_dic)
pre_df_2 = pd.DataFrame(pre_df_2)
pre_df_2

In [None]:
print('Average temperature rise:', add)

In [None]:
# Add the cumulative temperature to the column
new_pre_df = []
temp_accum = 0
for i in range(len(pre_df_2)):
    year, month, day = pre_df_2['year'][i], pre_df_2['month'][i], pre_df_2['day'][i]
    if int(month) >= 3:
        temperature = pre_df_2['temperature'][i]
        temp_accum += temperature
        innerList = [month, day, temperature, temp_accum]
        new_pre_df.append(innerList)
x = np.array(new_pre_df)
x

# 7. Predict blossoming

In [None]:
# scaling
#x = preprocessing.minmax_scale(x[:, :])
x = MinMaxScaler().fit_transform(x)

result = clf.predict(x)
print(x)
print(result)

In [None]:
# If the status returns or jumps, it is judged as an error
count_bud, count_bloom, count_full, count_scatter = 0, 0, 0, 0
date_bloom, date_full, date_scatter = 'none', 'none', 'none'
year = pre_df_2['year'][0]
for i in range(len(new_pre_df)):
    month = new_pre_df[i][0]
    day = new_pre_df[i][1]
    predict = result[i]
    if i == 0:
        count_bud += 1
    else:
        pre_predict = result[i - 1]
        if predict != pre_predict:
            if predict == 0:
                count_bud += 1
            elif predict == 1:
                count_bloom += 1
                date_bloom = '{}/{}/{}'.format(int(year), int(month), int(day))
            elif predict == 2:
                count_full += 1
                date_full = '{}/{}/{}'.format(int(year), int(month), int(day))
            elif predict == 3:
                count_scatter += 1
                date_scatter = '{}/{}/{}'.format(int(year), int(month), int(day))
#    print(predict)
if score > 0.95:
    if count_bud == 1 and count_bloom == 1 and count_full == 1 and count_scatter == 1:
        print('Congratulations, the prediction is successful !!')
        print('Bloom  :', date_bloom)
        print('Full   :', date_full)
        print('Scatter:', date_scatter)
        print('Accuracy score:', '{:.3f}'.format(score))
        # Time stamp
        import time, datetime
        today = datetime.datetime.fromtimestamp(time.time())
        print(today.strftime('Time stamp: %Y/%m/%d %H:%M:%S (UTC)'))
    else:
        print('ERROR !! (Missing status)')
        if count_bud > 1:
            print('ERROR !! (Over count "Before blooming :', count_bud, '")')
        if count_bloom > 1:
            print('ERROR !! (Over count "Bloom :', count_bloom, '")')
        if count_full > 1:
            print('ERROR !! (Over count "Full :', count_full, '")')
        if count_scatter > 1:
            print('ERROR !! (Over count "Scatter :', count_scatter, '")')
else:
    print('Low accuracy score : ', '{:.3f}'.format(score))