This is an AI Chatbot named Lee which will provide tourism information on the weather.<br>
Author: <br>
<ul>
<li>Foh Wei Lian Willian 0205930 0205930@student.kdupg.edu.my</li>
<li>Alvin Khoo Chun Yan 0206002 0206002@student.kdupg.edu.my</li>
<li>Chuah Jing Quan 0206012 0206012@student.kdupg.edu.my</li>
</ul>
<br>
Functions:
<ul>
<li>Get weather dataset through web scraping</li>
<li>Perform machine learning to predict the weather</li>
<li>Perform conversation with the user(NLP)</li>
</ul>


In [None]:
#install required library
!pip install -U scikit-fuzzy

#import all libraries used
import pandas as pd
import numpy as np
import skfuzzy as fuzz
from skfuzzy import control as ctrl

import random
import datetime
import re
import string
from nltk.chat.util import Chat, reflections
import nltk
from nltk import word_tokenize, pos_tag, ne_chunk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('maxent_ne_chunker')
nltk.download('words')
nltk.download('wordnet')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package maxent_ne_chunker is already up-to-date!
[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Package words is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
#loading dataset and preprocessing
weather_dataset = pd.read_csv('weatherData.csv')

#drop row with NaN data
weather_dataset = weather_dataset.dropna()

#simplified dates in dataset into month
months = []
for i in weather_dataset['startDate']:
  months.append(int(i.split('/')[1]))
weather_dataset['Month'] = months

#calculate average temperature
weather_dataset['Avg Temp'] = (weather_dataset['Min Temp'] + weather_dataset['Max Temp']) /2

weather_dataset = weather_dataset.drop(["startDate","endDate"],axis=1)


In [None]:
#get average from dataset based on month and location
def getPrediction(month:int,location:str)->dict:
  filtered_data = weather_dataset.loc[(weather_dataset['Month']==month) & (weather_dataset['State']==location)]
  return filtered_data.mean()

In [None]:
#implement fuzzy logic to get rain index
def get_rain_index(total_precipitation: int,max_precipitation: int, rain_day: int) -> float:

  #generate universe variables
  min_total_precipitate = weather_dataset['Total precipitation(mm) '].min()
  max_total_precipitate = weather_dataset['Total precipitation(mm) '].max()
  min_max_precipitate = weather_dataset['Max daily precipitation '].min()
  max_max_precipitate = weather_dataset['Max daily precipitation '].max()
  x_total_precipitate = ctrl.Antecedent(np.arange(min_total_precipitate,max_total_precipitate,1),'total_precipitation')
  x_max_precipitate = ctrl.Antecedent(np.arange(min_max_precipitate,max_max_precipitate,1),'max_precipitation')
  x_rain_day = ctrl.Antecedent(np.arange(0,32,1),'rain_day')
  x_rain_index = ctrl.Consequent(np.arange(0,10,1),'rain_index')

  #generate fuzzy membership functions
  x_total_precipitate['low'] = fuzz.trimf(
      x_total_precipitate.universe,
      [min_total_precipitate,min_total_precipitate,max_total_precipitate * 0.6]
      )
  x_total_precipitate['high'] = fuzz.trimf(
      x_total_precipitate.universe,
      [max_total_precipitate * 0.4,max_total_precipitate,max_total_precipitate]
      )

  x_max_precipitate['low'] = fuzz.trimf(
      x_max_precipitate.universe,
      [min_max_precipitate,min_max_precipitate,max_max_precipitate * 0.6]
      )
  x_max_precipitate['high'] = fuzz.trimf(
      x_max_precipitate.universe,
      [max_max_precipitate * 0.4,max_max_precipitate,max_max_precipitate]
      )

  x_rain_day['low'] = fuzz.trimf(x_rain_day.universe,[0,0,15])
  x_rain_day['high'] = fuzz.trimf(x_rain_day.universe,[10,31,31])

  x_rain_index['sunny'] = fuzz.trimf(x_rain_index.universe,[0,0,3])
  x_rain_index['rainy'] = fuzz.trimf(x_rain_index.universe,[2,5,8])
  x_rain_index['heavy'] = fuzz.trimf(x_rain_index.universe,[6,9,9])

  #define fuzzy relationship as below
  """
  1. if low total precipitate, low rain day then sunny
  2. if high precipitate, high rain day then rain heavily
  3. if high rain day, low total precipitate or low max precipitate then rainy
  """
  rule1 = ctrl.Rule(x_total_precipitate['low'] & x_rain_day['low'],x_rain_index['sunny'])
  rule2 = ctrl.Rule(x_total_precipitate['high'] & x_rain_day['high'],x_rain_index['heavy'])
  rule3 = ctrl.Rule(x_rain_day['high'] & (x_total_precipitate['low'] | x_max_precipitate['low']),x_rain_index['rainy'])

  #create control system
  control_system = ctrl.ControlSystem([rule1,rule2,rule3])
  rain_fuzzy = ctrl.ControlSystemSimulation(control_system)
  rain_fuzzy.inputs({'total_precipitation':total_precipitation,'max_precipitation':max_precipitation,'rain_day':rain_day})

  #defuzzification
  rain_fuzzy.compute()
  rain_index = rain_fuzzy.output['rain_index']
  #x_rain_index.view(sim=rain_fuzzy)
  return rain_index

In [None]:
#implement fuzzy logic on weather conditions
def Weather_Fuzzy(month: int, location: str)->dict:
  prediction = getPrediction(month,location)
  temperature = prediction['Avg Temp']
  rain_index = get_rain_index(prediction['Total precipitation(mm) '],prediction['Max daily precipitation '],prediction['Rain days'])

  #generate universe variables
  min_temp = weather_dataset['Avg Temp'].min()
  max_temp = weather_dataset['Avg Temp'].max()
  x_temp = ctrl.Antecedent(np.arange(min_temp,max_temp,1),'temperature')
  x_rain = ctrl.Antecedent(np.arange(0,10,1),'rain')
  x_weather = ctrl.Consequent(np.arange(0,10,1),'weather')

  #generate fuzzy membership functions
  x_temp['cold'] = fuzz.trimf(x_temp.universe,[min_temp,min_temp,max_temp*0.6])
  x_temp['warm'] = fuzz.trimf(x_temp.universe,[max_temp * 0.4,max_temp * 0.6,max_temp * 0.8])
  x_temp['hot'] = fuzz.trimf(x_temp.universe,[max_temp * 0.6,max_temp,max_temp])

  x_rain['sunny'] = fuzz.trimf(x_rain.universe,[0,0,3])
  x_rain['rainy'] = fuzz.trimf(x_rain.universe,[2,5,8])
  x_rain['heavy'] = fuzz.trimf(x_rain.universe,[6,9,9])

  x_weather['good'] = fuzz.trimf(x_weather.universe,[4,10,10])
  x_weather['bad'] = fuzz.trimf(x_weather.universe,[0,0,6])

  #define fuzzy relationship as below
  """
  1. if sunny, then good weather
  2. if heavy rain, then bad weather
  3. if rainy and cold, then bad weather
  4. if rainy and hot, then bad weather
  5. if rainy and warm, then good weather
  """
  rule1 = ctrl.Rule(x_rain['sunny'],x_weather['good'])
  rule2 = ctrl.Rule(x_rain['heavy'],x_weather['bad'])
  rule3 = ctrl.Rule(x_temp['cold'] & x_rain['rainy'],x_weather['bad'])
  rule4 = ctrl.Rule(x_temp['hot'] & x_rain['rainy'],x_weather['bad'])
  rule5 = ctrl.Rule(x_temp['warm'] & x_rain['rainy'],x_weather['good'])

  #create control system
  control_system = ctrl.ControlSystem([rule1,rule2,rule3,rule4,rule5])
  weather_fuzzy = ctrl.ControlSystemSimulation(control_system)

  weather_fuzzy.inputs({'temperature':temperature,'rain':rain_index})

  #defuzzification
  weather_fuzzy.compute()
  weather_index = weather_fuzzy.output['weather']

  #x_weather.view(sim=weather_fuzzy)
  #convert result
  weather_condition = ''
  rain_condition=''
  temp_condition=''

  if weather_index > 6 :
    weather_condition = 'good weather'
  else:
    weather_condition = 'bad weather'

  if rain_index < 3:
    rain_condition='sunny'
  elif rain_index < 7:
    rain_condition='rainy'
  else:
    rain_condition='rain heavily'


  return {'weather_condition':weather_condition,'rain_condition':rain_condition}

In [None]:
#Get location from sentence
def extractLocation(sentence : str) -> list:
  location = []
  tokenized = word_tokenize(sentence)
  tagged = nltk.pos_tag(tokenized)
  NE = nltk.ne_chunk(tagged)
  if('Negeri' in tokenized and 'Sembilan' in tokenized):
    location.append('Negeri Sembilan')
  #special case as dataset didnt have Kuala Lumpur
  elif('Kuala' in tokenized and 'Lumpur' in tokenized):
    location.append('Selangor')

  named_entities = []
  for tag in NE:
    if hasattr(tag,'label'):
      entity_name = ' '.join(c[0] for c in tag.leaves())
      entity_type = tag.label()
      named_entities.append((entity_name,entity_type))

  for tag in named_entities:
    if tag[1] == 'GPE' or tag[1]=='PERSON':
      if tag[0].find("Visit") != -1:
        location.append(tag[0].replace("Visit ",""))
      else:
        location.append(tag[0])
  return location

In [None]:
def get_month():
    isValid = False
    while not isValid:
        month_dict = {1: ['JAN','JANUARY','1'],
                  2: ['FEB','FEBRUARY','2'],
                  3: ['MAR','MARCH','3'],
                  4: ['APR','APRIL','4'],
                  5: ['MAY','MAY','5'],
                  6:['JUN','JUN','6'],
                  7:['JUL','JULY','7'],
                  8:['AUG','AUGUST','8'],
                  9:['SEP','SEPTEMBER','9'],
                 10:['OCT','OCTOBER','10'],
                 11:['NOV','NOVEMBER','11'],
                 12:['DEC','DECEMBER','12']}
        user_input = input ("     Which month do you want visit the place ?\nYou :")
        month = user_input.split(' ')
        key = []
        for i in month:
          for index,k in enumerate(month_dict):
            if i.upper() in month_dict[k]:
              key.append(k)
        if(key != []):
            value = int(key[0])
            if(value<1 or value>12):
                print("Lee: month must between 1 to 12, please try again")
                isValid = False
            else:
                isValid = True
                return value
        else:
            print("Lee: month is invalid, please try again")
            isValid = False

In [None]:
def modifyUserRespond(text):
    text = re.sub('[^a-zA-Z]', ' ', text)
    text = text.lower()
    text = text.split()
    ps = PorterStemmer()
    all_stopwords = stopwords.words('english')
    all_stopwords.remove('not')
    text = [ps.stem(word) for word in text if not word in set(all_stopwords)]
    text = ' '.join(text)
    return text

In [None]:
def give_suggestion(place:str,weather:dict):
  if 'weather_condition' in weather.keys() and 'rain_condition' in weather.keys():
    if weather['weather_condition'] == 'good weather':
      if weather['rain_condition'] == 'sunny':
        print('Lee: '+place,' has a good weather, that is a good time to visit there.')
      elif weather['rain_condition'] == 'rainy':
        print('Lee: '+place,' has a good weather, however there might be rainy day, make sure you prepare your umbrella or raining coat if you going to visit there.')
    elif weather['weather_condition'] == 'bad weather':
      if weather['rain_condition'] == 'rainy':
        print('Lee: '+place,' might be rainy, you can still visit there but please bring along your umbrella or raining coat.')
      elif weather['rain_condition'] == 'rain heavily':
        print('Lee: '+place,' has a bad weather on that time. I suggest you to replan your schedule if you are going to visit there at that time.')


In [None]:
#setup response pattern of chatbot to improve integrity
pairs = [
    [
        r"my name is (.*)",
        ["Hello %1, How are you today ?"]
    ],
    [
        r"(.*) hi|hello (.*)|hi|hello",
        ["Hello", "Hey there","Nice to meet you"]
    ],
    [
        r"(.*) your name?|name",
        ["I am Lee, I am your guide on Malaysia weather information."]
    ],
    [
        r"(quit|bye|see you|thank you|nothing|exit)",
        ["Bye take care. See you again :) ","It was nice talking to you. See you again :)","I am glad to help you"]
    ]
]

unknown = ('Sorry, I dont understand where the location is, is that a state in Malaysia?', 'Sorry, I am not clear on the place, is that a good place to visit?', 'The place is quite new for me, maybe I will consider visit there next time')
terminate_keyword = ["quit","bye","nothing","exit","thank"]
location = weather_dataset['State'].unique()
month = None
destination = []

chat = Chat(pairs, reflections)

In [None]:
#Main Program
flag = True
print("##......##.########.##........######...#######..##.....##.########")
print("##..##..##.##.......##.......##....##.##.....##.###...###.##......")
print("##..##..##.##.......##.......##.......##.....##.####.####.##......")
print("##..##..##.######...##.......##.......##.....##.##.###.##.######..")
print("##..##..##.##.......##.......##.......##.....##.##.....##.##......")
print(" #..##..##.##.......##.......##....##.##.....##.##.....##.##......")
print(" ###..###..########.########..######...#######..##.....##.########\n\n")
print("Lee: Hi, I am Lee, I can guide you for the weather in Malaysia.")
print("     May I know where you are going to visit?")

while(flag == True):
  terminate = False
  user_response = input("You: ")
  user_input_split = nltk.word_tokenize(user_response)
  for x in user_input_split:
    if x in terminate_keyword:
      terminate = True
  if not terminate:
      if (chat.respond(user_response) != None):
        print("Lee: " + chat.respond(user_response))
      else:
        destination = extractLocation(string.capwords(user_response))
        destination = list(filter(lambda x: x in location,destination))
        if len(destination) != 0:
            print("Lee: " + "Let me check it for you.")
            month = get_month()
            for place in destination:
                weather = Weather_Fuzzy(month,place)
                give_suggestion(place,weather)
                print("     Is there any other places you are going to visit? (type quit to end)")
        else:
            print("Lee: " + random.choice(unknown))
  else:
      print(chat.respond(user_response))
      print("Lee: " + chat.respond(user_response))
      flag = False

##......##.########.##........######...#######..##.....##.########
##..##..##.##.......##.......##....##.##.....##.###...###.##......
##..##..##.##.......##.......##.......##.....##.####.####.##......
##..##..##.######...##.......##.......##.....##.##.###.##.######..
##..##..##.##.......##.......##.......##.....##.##.....##.##......
 #..##..##.##.......##.......##....##.##.....##.##.....##.##......
 ###..###..########.########..######...#######..##.....##.########


Lee: Hi, I am Lee, I can guide you for the weather in Malaysia.
     May I know where you are going to visit?
Lee: Let me check it for you.
Lee: Terengganu  has a good weather, that is a good time to visit there.
     Is there any other places you are going to visit? (type quit to end)
Lee: Sorry, I dont understand where the location is, is that a state in Malaysia?
Lee: The place is quite new for me, maybe I will consider visit there next time
Lee: Sorry, I dont understand where the location is, is that a state in Malaysia