In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
from plotly.figure_factory import create_table
import plotly.express as px
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
data=pd.read_csv('../input/weather-dataset-rattle-package/weatherAUS.csv')

table = create_table(data.head(10))
for i in range(len(table.layout.annotations)):
    table.layout.annotations[i].font.size = 7
    
py.iplot(table,filename='table')

In [None]:
data=data.drop(['Evaporation', 'Sunshine','Cloud9am','Cloud3pm'], axis=1)
data=data.dropna(subset=['RainToday'])

In [None]:
print('data type : ' ,type(data))
print('table type : ' ,type(table))

In [None]:
data.info()

In [None]:
data['Date'] = pd.to_datetime(data['Date'])
data.sort_values(by=['Date'], inplace=True)

**Quick Visualizations with Custom Bar Charts**

In [None]:
df1 = data.groupby(["RainToday"]).count().reset_index()

fig = px.bar(df1,
             y=data.groupby(["RainToday"]).size(),
             x="RainToday",
             color='RainToday')
fig.show()

In [None]:
data_x = data.query("Location in ('Canberra','Sydney','Perth','Darwin','Hobart','Brisbane')")

In [None]:
group_data = data_x.groupby(["RainToday","Location"])['RISK_MM'].mean().reset_index()
group_data

In [None]:
fig = px.bar(group_data, x="Location", y="RISK_MM", 
             color="RainToday") 
   
fig.show()

In [None]:
fig = px.line(data, x="Date", y="RISK_MM", color="RainToday",
        line_shape="spline", render_mode="svg")
fig.show()

In [None]:
!pip install geopy 
!pip install Nominatim

In [None]:
from geopy.exc import GeocoderTimedOut 
from geopy.geocoders import Nominatim 
   
# declare an empty list to store 
# latitude and longitude of values  
# of city column 
longitude = [] 
latitude = [] 
   
# function to find the coordinate 
# of a given city  
def findGeocode(city): 
       
    # try and catch is used to overcome 
    # the exception thrown by geolocator 
    # using geocodertimedout   
    try: 
          
        # Specify the user_agent as your 
        # app name it should not be none 
        geolocator = Nominatim(user_agent="my_name") 
          
        return geolocator.geocode(city) 
      
    except GeocoderTimedOut: 
          
        return findGeocode(city)

In [None]:
loca=data[['Location']]
loca=loca.drop_duplicates()
loca.head()

In [None]:
# each value from city column 
# will be fetched and sent to 
# function find_geocode    
for i in (loca["Location"]): 
      
    if findGeocode(i) != None: 
           
        loc = findGeocode(i) 
          
        # coordinates returned from  
        # function is stored into 
        # two separate list 
        latitude.append(loc.latitude) 
        longitude.append(loc.longitude) 
       
    # if coordinate for a city not 
    # found, insert "NaN" indicating  
    # missing value  
    else: 
        latitude.append(np.nan) 
        longitude.append(np.nan)

In [None]:
loca["Longitude"] = longitude 
loca["Latitude"] = latitude 

In [None]:
loca.head()

In [None]:
data_left = pd.merge(data, loca, on='Location', how='left')
data_left.head()

In [None]:
import datetime as dt
data_left['AvgTemp']=(data_left['MinTemp']+data_left['MaxTemp'])/2
data_left['mont_of_year'] = data_left['Date'].apply(lambda x: dt.datetime.strftime(x,'%b-%Y'))
data_left.head()

In [None]:
from datetime import datetime
data_left['mont_of_year_formatted'] = pd.to_datetime(data_left['mont_of_year'])

In [None]:
group_data2 = data_left.groupby(["Location","Longitude",'Latitude','mont_of_year','mont_of_year_formatted'])['AvgTemp'].mean().reset_index()
group_data2.head()

In [None]:
group_data2.sort_values(by=['mont_of_year_formatted'], inplace=True)

In [None]:
fig = px.density_mapbox(group_data2, lat='Latitude', lon='Longitude', radius=10, hover_name="Location",color_continuous_scale="Viridis",
                        center=dict(lat=-30, lon=135),zoom=3, height=700,mapbox_style="stamen-terrain",animation_frame="mont_of_year")
fig.show()

In [None]:
fig = px.scatter_mapbox(group_data2, lat="Latitude", lon="Longitude", hover_name="Location", color="AvgTemp",
                        size="AvgTemp", color_continuous_scale=px.colors.sequential.matter, size_max=20,
                        zoom=3, height=700, mapbox_style="open-street-map",animation_frame="mont_of_year")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

In [None]:
group_data5 = data_left.groupby(["Location","Longitude",'Latitude','mont_of_year','mont_of_year_formatted','RainToday'])['RISK_MM'].mean().reset_index()
group_data5.sort_values(by=['mont_of_year_formatted'], inplace=True)

In [None]:
import plotly.express as px
fig = px.scatter_mapbox(group_data5, lat="Latitude", lon="Longitude", color="RainToday", size="RISK_MM",
                  color_continuous_scale=px.colors.cyclical.IceFire, size_max=20, zoom=3,animation_frame="mont_of_year",
                  mapbox_style="carto-positron")
fig.show()

In [None]:
group_data3 = data_left.groupby(["Location","Longitude",'Latitude','mont_of_year','mont_of_year_formatted','RainToday'])['MinTemp','MaxTemp','RISK_MM'].mean().reset_index()
group_data3.head()

In [None]:
group_data33 = group_data3.query("Location in ('Townsville','Albury','AliceSprings','Cairns','Ballarat','Newcastle')")
group_data33.sort_values(by=['mont_of_year_formatted'], inplace=True)

In [None]:
fig = px.scatter(group_data33, x="MinTemp", y="MaxTemp", animation_frame="mont_of_year", animation_group="Location",
           size="RISK_MM", color="RainToday", hover_name="Location", facet_col="Location",
           log_x=True, size_max=45)
fig.show()

In [None]:
group_data4 = data_left.groupby(['mont_of_year','mont_of_year_formatted'])['WindSpeed3pm','WindSpeed9am','Humidity9am','Humidity3pm','Pressure9am','Pressure3pm','Temp9am','Temp3pm'].mean().reset_index()
group_data4.sort_values(by=['mont_of_year_formatted'], inplace=True)
group_data4.head()

In [None]:
fig = px.line(group_data4, x='mont_of_year', y=['WindSpeed9am', 'WindSpeed3pm'])

# Show plot 
fig.show()

In [None]:
fig = px.line(group_data4, x='mont_of_year', y=['Humidity9am', 'Humidity3pm'])

# Show plot 
fig.show()

In [None]:
fig = px.line(group_data4, x='mont_of_year', y=['Pressure9am', 'Pressure3pm'])

# Show plot 
fig.show()

In [None]:
fig = px.line(group_data4, x='mont_of_year', y=['Temp9am', 'Temp3pm'])

# Show plot 
fig.show()