In [1]:
import pymongo
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

# Creating the Connections

In [2]:
# create the connection to MongoDB
# create an instance of the database and collections

conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

db = client.australia_fire_db
bushfiresbyState = db.bushfiresbyState
historicalFires = db.historicalFires

In [3]:
# urls to scrape

byStateurl = 'https://en.wikipedia.org/wiki/2019%E2%80%9320_Australian_bushfire_season'
historicalurl = 'https://en.wikipedia.org/wiki/List_of_major_bushfires_in_Australia'

In [4]:
# request the html using beautiful soup

historical_response = requests.get(historicalurl)
bystate_response = requests.get(byStateurl)

In [5]:
# parse the html text

h_soup = BeautifulSoup(historical_response.text, 'html.parser')
bs_soup = BeautifulSoup(bystate_response.text, 'html.parser')

# Historical Bushfire Scrapping

In [6]:
# the table headers are within the table body in the first two rows

h_table = h_soup.find('table', class_="wikitable")
h_table_headers = h_soup.find_all("tr")[0:2]

In [7]:
# scrape the table headers and append them to the headers list

h_headers = []
for table_header in h_table_headers:
    try:
        value = table_header.text
#         value = value.split("\n")
        h_headers.append(value)
        
    except AttributeError as e:
        print(e)
        
print(h_headers)
print(len(h_headers))

['\nDate\n\nName or description\n\nState(s) /territories\n\nArea burned (approx.)\n\nFatalities\n\nProperties damaged\n\nNotes\n', '\nha\n\nacres\n\nHomes(destroyed)\n\nOther buildings\n\nOther damage\n']
2


In [8]:
# manually create the headers list, because the scraped table headers was too complicated
# if there is time, create logic to put the two header rows into one similar to the list below

h_headers = ['Date', 'Name', 'State(s)/territories', 'AreaBurned(ha)', 'AreaBurned(acres)', 'Fatalities', 'PropertiesDamaged(HomesDestroyed)']

In [9]:
# find objects to scrape
# the table data is after the headers which is in the first two rows

h_table = h_soup.find('table', class_="wikitable")
h_table_body = h_table.find("tbody")
h_table_row = h_table_body.find_all('tr')[2:]

In [10]:
# scrape data and create a list of lists for each row of data

h_data = []

for row in h_table_row:
    
    datarow = []
    
    table_data = row.find_all('td')
    
    for tdata in table_data:
        try:
            value = tdata.text
    #             value.split("\n\n")
            value = value.replace("\n", "")
            datarow.append(value)

        except AttributeError as e:
            print(e)
            
    h_data.append(datarow)
        
# print(h_data)

In [11]:
# convert scrapped data into a dataframe

h_df = pd.DataFrame(h_data)
h_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,6 February 1851,Black Thursday bushfires,Victoria,5000000,12000000,approx. 12,Nil,Nil,1 million sheepthousands of cattle,[7][8]
1,1 February 1898,Red Tuesday bushfires,Victoria,260000,640000,12,Nil,2000,,[8][9][10]
2,February – March 1926,1926 bushfires,Victoria,390000,960000,60,1000,Nil,,[11]
3,December 1938 – January 1939,Black Friday bushfires,Victoria,2000000,4900000,71,3700,Nil,,[5]
4,14 January – 14 February 1944,1944 Victorian bushfires,Victoria,1000000,2500000,15–20,approx. 500,Nil,,[8]
...,...,...,...,...,...,...,...,...,...,...
63,25 November – 2 December 2015,2015 Pinery bushfire,South Australia,85000,210000,2,91,Nil,,[66][67]
64,January 2016,2016 Murray Road bushfire (Waroona and Harvey),Western Australia,69165,170910,2,181,Yarloop Workshops,Thousands of hectares of Lane Poole ReservePro...,[68]
65,11 – 14 February 2017,2017 New South Wales bushfires,New South Wales,52000,130000,Nil,35,Nil,,[69]
66,early February 2019,Tingha bushfire,New South Wales,23419,57870,Nil,19,57,LivestockVineyards,[70][71]


In [12]:
# keep desired rows

h_drop_rows_df = h_df[[0,1,2,3,4,5,6]]
h_drop_rows_df

Unnamed: 0,0,1,2,3,4,5,6
0,6 February 1851,Black Thursday bushfires,Victoria,5000000,12000000,approx. 12,Nil
1,1 February 1898,Red Tuesday bushfires,Victoria,260000,640000,12,Nil
2,February – March 1926,1926 bushfires,Victoria,390000,960000,60,1000
3,December 1938 – January 1939,Black Friday bushfires,Victoria,2000000,4900000,71,3700
4,14 January – 14 February 1944,1944 Victorian bushfires,Victoria,1000000,2500000,15–20,approx. 500
...,...,...,...,...,...,...,...
63,25 November – 2 December 2015,2015 Pinery bushfire,South Australia,85000,210000,2,91
64,January 2016,2016 Murray Road bushfire (Waroona and Harvey),Western Australia,69165,170910,2,181
65,11 – 14 February 2017,2017 New South Wales bushfires,New South Wales,52000,130000,Nil,35
66,early February 2019,Tingha bushfire,New South Wales,23419,57870,Nil,19


In [13]:
# Add column headers

h_drop_rows_df.columns = h_headers
h_drop_rows_df

Unnamed: 0,Date,Name,State(s)/territories,AreaBurned(ha),AreaBurned(acres),Fatalities,PropertiesDamaged(HomesDestroyed)
0,6 February 1851,Black Thursday bushfires,Victoria,5000000,12000000,approx. 12,Nil
1,1 February 1898,Red Tuesday bushfires,Victoria,260000,640000,12,Nil
2,February – March 1926,1926 bushfires,Victoria,390000,960000,60,1000
3,December 1938 – January 1939,Black Friday bushfires,Victoria,2000000,4900000,71,3700
4,14 January – 14 February 1944,1944 Victorian bushfires,Victoria,1000000,2500000,15–20,approx. 500
...,...,...,...,...,...,...,...
63,25 November – 2 December 2015,2015 Pinery bushfire,South Australia,85000,210000,2,91
64,January 2016,2016 Murray Road bushfire (Waroona and Harvey),Western Australia,69165,170910,2,181
65,11 – 14 February 2017,2017 New South Wales bushfires,New South Wales,52000,130000,Nil,35
66,early February 2019,Tingha bushfire,New South Wales,23419,57870,Nil,19


In [14]:
# replace Nil with 0 values

h_nil_df = h_drop_rows_df.replace('Nil', '0')
h_nil_df.head(10)

Unnamed: 0,Date,Name,State(s)/territories,AreaBurned(ha),AreaBurned(acres),Fatalities,PropertiesDamaged(HomesDestroyed)
0,6 February 1851,Black Thursday bushfires,Victoria,5000000,12000000,approx. 12,0
1,1 February 1898,Red Tuesday bushfires,Victoria,260000,640000,12,0
2,February – March 1926,1926 bushfires,Victoria,390000,960000,60,1000
3,December 1938 – January 1939,Black Friday bushfires,Victoria,2000000,4900000,71,3700
4,14 January – 14 February 1944,1944 Victorian bushfires,Victoria,1000000,2500000,15–20,approx. 500
5,18 November 1944,1944 Blue Mountains bushfire,New South Wales,,,0,approx. 40
6,November 1951 – January 1952,1951–52 bushfires,Victoria,4000000,9900000,11,0
7,2 January 1955,Black Sunday bushfires,South Australia,"39,000–160,000","96,000–395,000",2,40[b]
8,30 November 1957,"1957 Grose Valley bushfire, Blue Mountains",New South Wales,,,4,0
9,2 December 1957,"1957 Leura bushfire, Blue Mountains",New South Wales,,,0,170[c]


In [15]:
# remove commas from numbers
# replace unwanted values as NaNs
# replace range data into an average value
# remove citations found within []

h_tonumeric_df = h_nil_df.copy()
h_tonumeric_df['AreaBurned(ha)'] = h_nil_df['AreaBurned(ha)'].str.replace(',','')
h_tonumeric_df['AreaBurned(acres)'] = h_nil_df['AreaBurned(acres)'].str.replace(',','')
h_tonumeric_df['PropertiesDamaged(HomesDestroyed)'] = h_nil_df['PropertiesDamaged(HomesDestroyed)'].str.replace(',','')

columnstoedit = ['AreaBurned(ha)','AreaBurned(acres)','Fatalities','PropertiesDamaged(HomesDestroyed)']

for i in range(len(h_tonumeric_df['Date'])):
    for column in columnstoedit:
        
        if ((h_tonumeric_df[column][i] == '') and (h_tonumeric_df[column][i] != '0')) or (h_tonumeric_df[column][i] == 'unknown') :
            h_tonumeric_df[column][i] = 'NaN'

        if 'approx. ' in str(h_tonumeric_df[column][i]):
            h_tonumeric_df[column][i] = h_tonumeric_df[column][i].replace('approx. ', '')
            
        if 'than ' in str(h_tonumeric_df[column][i]):
            h_tonumeric_df[column][i] = h_tonumeric_df[column][i].split(' ')[-1]

        if len(str(h_tonumeric_df[column][i]).split('–')) == 2:
            splitvalues = str(h_tonumeric_df[column][i]).split('–')
            h_tonumeric_df[column][i] = np.mean([int(splitvalues[0]), int(splitvalues[1])])

        if len(str(h_tonumeric_df[column][i]).split('[')) >= 2:
            h_tonumeric_df[column][i] = str(h_tonumeric_df[column][i]).split('[')[0]
        

h_tonumeric_df

Unnamed: 0,Date,Name,State(s)/territories,AreaBurned(ha),AreaBurned(acres),Fatalities,PropertiesDamaged(HomesDestroyed)
0,6 February 1851,Black Thursday bushfires,Victoria,5000000,12000000,12,0
1,1 February 1898,Red Tuesday bushfires,Victoria,260000,640000,12,0
2,February – March 1926,1926 bushfires,Victoria,390000,960000,60,1000
3,December 1938 – January 1939,Black Friday bushfires,Victoria,2000000,4900000,71,3700
4,14 January – 14 February 1944,1944 Victorian bushfires,Victoria,1000000,2500000,17.5,500
...,...,...,...,...,...,...,...
63,25 November – 2 December 2015,2015 Pinery bushfire,South Australia,85000,210000,2,91
64,January 2016,2016 Murray Road bushfire (Waroona and Harvey),Western Australia,69165,170910,2,181
65,11 – 14 February 2017,2017 New South Wales bushfires,New South Wales,52000,130000,0,35
66,early February 2019,Tingha bushfire,New South Wales,23419,57870,0,19


In [17]:
# pull out the year the fire took place, if over two years, grab the beginning year

h_year_df = h_tonumeric_df.copy()
h_year_df['Year'] = ''
for i in range(len(h_year_df['Date'])):
    
    resultslist = []
    results = str(h_year_df['Date'][i]).split(' ')

    for result in results:
        try:
            value = int(result)
            if len(str(value)) == 4:
                resultslist.append(value)
                year = np.min(resultslist)
                h_year_df['Year'][i] = year

        except:
            year = 'NaN'
            h_year_df['Year'][i] = year
    
h_year_df[:50]


Unnamed: 0,Date,Name,State(s)/territories,AreaBurned(ha),AreaBurned(acres),Fatalities,PropertiesDamaged(HomesDestroyed),Year
0,6 February 1851,Black Thursday bushfires,Victoria,5000000.0,12000000.0,12.0,0.0,1851.0
1,1 February 1898,Red Tuesday bushfires,Victoria,260000.0,640000.0,12.0,0.0,1898.0
2,February – March 1926,1926 bushfires,Victoria,390000.0,960000.0,60.0,1000.0,1926.0
3,December 1938 – January 1939,Black Friday bushfires,Victoria,2000000.0,4900000.0,71.0,3700.0,1938.0
4,14 January – 14 February 1944,1944 Victorian bushfires,Victoria,1000000.0,2500000.0,17.5,500.0,1944.0
5,18 November 1944,1944 Blue Mountains bushfire,New South Wales,,,0.0,40.0,1944.0
6,November 1951 – January 1952,1951–52 bushfires,Victoria,4000000.0,9900000.0,11.0,0.0,1951.0
7,2 January 1955,Black Sunday bushfires,South Australia,99500.0,245500.0,2.0,40.0,1955.0
8,30 November 1957,"1957 Grose Valley bushfire, Blue Mountains",New South Wales,,,4.0,0.0,1957.0
9,2 December 1957,"1957 Leura bushfire, Blue Mountains",New South Wales,,,0.0,170.0,1957.0


In [18]:
# remove nan values and empty values and convert them to integers

h_casting_df = h_year_df.copy()

h_casting_df = h_casting_df[h_casting_df['AreaBurned(ha)'] != 'NaN']
h_casting_df = h_casting_df[h_casting_df['PropertiesDamaged(HomesDestroyed)'] != 'NaN']
h_casting_df = h_casting_df[h_casting_df['PropertiesDamaged(HomesDestroyed)'] != '']
h_casting_df = h_casting_df[h_casting_df['Year'] != 'NaN']
h_casting_df = h_casting_df.astype(
    {
        'AreaBurned(ha)':'int',
        'AreaBurned(acres)': 'int',
        'Fatalities':'int',
        'PropertiesDamaged(HomesDestroyed)':'int',
        'Year': 'int'
    })
# h_casting_df['Year'] = pd.to_datetime(h_casting_df['Year'], format = '%Y')
h_casting_df.dtypes
h_casting_df

Unnamed: 0,Date,Name,State(s)/territories,AreaBurned(ha),AreaBurned(acres),Fatalities,PropertiesDamaged(HomesDestroyed),Year
0,6 February 1851,Black Thursday bushfires,Victoria,5000000,12000000,12,0,1851
1,1 February 1898,Red Tuesday bushfires,Victoria,260000,640000,12,0,1898
2,February – March 1926,1926 bushfires,Victoria,390000,960000,60,1000,1926
3,December 1938 – January 1939,Black Friday bushfires,Victoria,2000000,4900000,71,3700,1938
4,14 January – 14 February 1944,1944 Victorian bushfires,Victoria,1000000,2500000,17,500,1944
6,November 1951 – January 1952,1951–52 bushfires,Victoria,4000000,9900000,11,0,1951
7,2 January 1955,Black Sunday bushfires,South Australia,99500,245500,2,40,1955
10,January – March 1961,1961 Western Australian bushfires,Western Australia,1800000,4400000,0,160,1961
12,16 February – 13 March 1965,1965 Gippsland bushfires,Victoria,315000,780000,0,20,1965
13,5 – 14 March 1965,Southern Highlands bushfires,New South Wales,251000,620000,3,59,1965


In [19]:
# transform the dataframe into a dictionary so that we can feed it to MongoDB
h_dict = h_casting_df.to_dict('records')
h_dict

[{'Date': '6 February 1851',
  'Name': 'Black Thursday bushfires',
  'State(s)/territories': 'Victoria',
  'AreaBurned(ha)': 5000000,
  'AreaBurned(acres)': 12000000,
  'Fatalities': 12,
  'PropertiesDamaged(HomesDestroyed)': 0,
  'Year': 1851},
 {'Date': '1 February 1898',
  'Name': 'Red Tuesday bushfires',
  'State(s)/territories': 'Victoria',
  'AreaBurned(ha)': 260000,
  'AreaBurned(acres)': 640000,
  'Fatalities': 12,
  'PropertiesDamaged(HomesDestroyed)': 0,
  'Year': 1898},
 {'Date': 'February – March 1926',
  'Name': '1926 bushfires',
  'State(s)/territories': 'Victoria',
  'AreaBurned(ha)': 390000,
  'AreaBurned(acres)': 960000,
  'Fatalities': 60,
  'PropertiesDamaged(HomesDestroyed)': 1000,
  'Year': 1926},
 {'Date': 'December 1938 – January 1939',
  'Name': 'Black Friday bushfires',
  'State(s)/territories': 'Victoria',
  'AreaBurned(ha)': 2000000,
  'AreaBurned(acres)': 4900000,
  'Fatalities': 71,
  'PropertiesDamaged(HomesDestroyed)': 3700,
  'Year': 1938},
 {'Date': '14

In [20]:
# insert records into the MongoDB collection histroicalFires

if (historicalFires.count() == 0):
    historicalFires.insert(h_dict)
    
else:
    print("Data already exists")

  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


# Scrapping Data from 2019-2020 by State

In [21]:
bs_table = bs_soup.find('table', class_='sortable')
print(bs_table.prettify())

<table class="wikitable sortable plainrowheaders">
 <tbody>
  <tr>
   <th rowspan="2" scope="col" style="font-weight:bold;">
    State / territory
   </th>
   <th rowspan="2" scope="col" style="font-weight:bold;">
    Fatalities
   </th>
   <th rowspan="2" scope="col" style="font-weight:bold;">
    Homes
    <br/>
    lost
   </th>
   <th colspan="2" scope="col" style="font-weight:bold;">
    Area
    <br/>
    <span style="font-size:85%;">
     (estimated)
    </span>
   </th>
   <th rowspan="2" scope="col" style="font-weight:bold;">
    Notes
   </th>
  </tr>
  <tr>
   <th>
    <a href="/wiki/Hectare" title="Hectare">
     ha
    </a>
   </th>
   <th>
    <a href="/wiki/Acre" title="Acre">
     acres
    </a>
   </th>
  </tr>
  <tr>
   <th scope="row">
    Australian Capital Territory
   </th>
   <td align="right">
    1
   </td>
   <td align="right">
    0
   </td>
   <td style="text-align:right;">
    56,688
   </td>
   <td style="text-align:right;">
    140,080
   </td>
   <td>
  

In [22]:
bs_theaders = bs_soup.find('table', class_='sortable').find_all('tr')[0:2]


bs_headers = []
for table_header in bs_theaders:
    try:
        value = table_header.text
        value = value.split("\n\n")
        bs_headers.append(value)
        
    except AttributeError as e:
        print(e)
        
print(bs_headers)
print(len(bs_headers))

[['\nState / territory', 'Fatalities', 'Homeslost', 'Area(estimated)', 'Notes\n'], ['\nha\nacres\n']]
2


In [23]:
bs_theaders = ['State/Territory', 'Fatalities', 'Homeslost', 'Area(estimated)(ha)', 'Area(estimated)(acres)', 'Notes']

In [24]:
# scrape data and create a list of lists for each row of data

bs_table_row = bs_soup.find('table', class_='sortable').find_all('tr')[2:]

bs_data = []

for row in bs_table_row:
    
    datarow = []
    
    table_header = row.find('th').text
    table_header = table_header.replace('\n','')
    datarow.append(table_header)
    table_data = row.find_all('td')
    
    for tdata in table_data:
        try:
            value = tdata.text
    #             value.split("\n\n")
            value = value.replace("\n", "")
            datarow.append(value)

        except AttributeError as e:
            print(e)
            
    bs_data.append(datarow)
        
print(bs_data)

[['Australian Capital Territory', '1', '0', '56,688', '140,080', 'Area;[91][92][93] fatality[c][95]'], ['New South Wales', '25', '2,439', '5,400,000', '13,300,000', 'Area;[96] fatalities;[18][37] homes[96]'], ['Northern Territory', '0', '5', '6,800,000', '16,800,000', 'Area, includes mainly scrub fires, which are within the normal range of area burnt by bushfires each year;[49] homes[97]'], ['Queensland', '0', '48', '2,500,000', '6,180,000', 'Area, includes scrub fires;[49] homes[97][d]'], ['South Australia', '3', '151', '490,000', '1,210,000', 'Area;[49] fatalities;[102] homes (KI:65)[103] (AH:86)[104]'], ['Tasmania', '0', '2', '36,000', '89,000', 'Area;[49] homes[97]'], ['Victoria', '5', '396', '1,500,000', '3,710,000', 'Area;[49] fatalities;[15] homes[105]'], ['Western Australia', '0', '1', '2,200,000', '5,440,000', 'Area, includes scrub fires;[49] homes[97]'], ['Total', '34', '3,500+', '18,736,070', '46,300,000', '[e][b][109][110] Total area estimate as of 13 February 2020; current

In [25]:
# convert scrapped data into a dataframe

bs_df = pd.DataFrame(bs_data)
bs_df

Unnamed: 0,0,1,2,3,4,5
0,Australian Capital Territory,1,0,56688,140080,Area;[91][92][93] fatality[c][95]
1,New South Wales,25,2439,5400000,13300000,Area;[96] fatalities;[18][37] homes[96]
2,Northern Territory,0,5,6800000,16800000,"Area, includes mainly scrub fires, which are w..."
3,Queensland,0,48,2500000,6180000,"Area, includes scrub fires;[49] homes[97][d]"
4,South Australia,3,151,490000,1210000,Area;[49] fatalities;[102] homes (KI:65)[103] ...
5,Tasmania,0,2,36000,89000,Area;[49] homes[97]
6,Victoria,5,396,1500000,3710000,Area;[49] fatalities;[15] homes[105]
7,Western Australia,0,1,2200000,5440000,"Area, includes scrub fires;[49] homes[97]"
8,Total,34,"3,500+",18736070,46300000,[e][b][109][110] Total area estimate as of 13 ...


In [26]:
# Add column headers

bs_df.columns = bs_theaders
bs_df

Unnamed: 0,State/Territory,Fatalities,Homeslost,Area(estimated)(ha),Area(estimated)(acres),Notes
0,Australian Capital Territory,1,0,56688,140080,Area;[91][92][93] fatality[c][95]
1,New South Wales,25,2439,5400000,13300000,Area;[96] fatalities;[18][37] homes[96]
2,Northern Territory,0,5,6800000,16800000,"Area, includes mainly scrub fires, which are w..."
3,Queensland,0,48,2500000,6180000,"Area, includes scrub fires;[49] homes[97][d]"
4,South Australia,3,151,490000,1210000,Area;[49] fatalities;[102] homes (KI:65)[103] ...
5,Tasmania,0,2,36000,89000,Area;[49] homes[97]
6,Victoria,5,396,1500000,3710000,Area;[49] fatalities;[15] homes[105]
7,Western Australia,0,1,2200000,5440000,"Area, includes scrub fires;[49] homes[97]"
8,Total,34,"3,500+",18736070,46300000,[e][b][109][110] Total area estimate as of 13 ...


In [29]:
bs_dropchar_df = bs_df.drop('Notes', axis=1)

columnstoedit = ['Homeslost', 'Area(estimated)(ha)', 'Area(estimated)(acres)']

for column in columnstoedit:
    for i in range(len(bs_dropchar_df[column])):
        try:
            bs_dropchar_df[column][i] = str(bs_dropchar_df[column][i]).replace(',','')
            bs_dropchar_df[column][i] = str(bs_dropchar_df[column][i]).replace('+', '')
        except:
            pass

bs_dropchar_df

Unnamed: 0,State/Territory,Fatalities,Homeslost,Area(estimated)(ha),Area(estimated)(acres)
0,Australian Capital Territory,1,0,56688,140080
1,New South Wales,25,2439,5400000,13300000
2,Northern Territory,0,5,6800000,16800000
3,Queensland,0,48,2500000,6180000
4,South Australia,3,151,490000,1210000
5,Tasmania,0,2,36000,89000
6,Victoria,5,396,1500000,3710000
7,Western Australia,0,1,2200000,5440000
8,Total,34,3500,18736070,46300000


In [38]:
bs_cast_df = bs_dropchar_df.copy()

bs_cast_df = bs_cast_df.astype({
    'Fatalities': 'int',
    'Homeslost': 'int',
    'Area(estimated)(ha)': 'int',
    'Area(estimated)(acres)': 'int'
})

bs_cast_df.dtypes

State/Territory           object
Fatalities                 int64
Homeslost                  int64
Area(estimated)(ha)        int64
Area(estimated)(acres)     int64
dtype: object

In [39]:
bs_dict = bs_cast_df.to_dict()
bs_dict

{'State/Territory': {0: 'Australian Capital Territory',
  1: 'New South Wales',
  2: 'Northern Territory',
  3: 'Queensland',
  4: 'South Australia',
  5: 'Tasmania',
  6: 'Victoria',
  7: 'Western Australia',
  8: 'Total'},
 'Fatalities': {0: 1, 1: 25, 2: 0, 3: 0, 4: 3, 5: 0, 6: 5, 7: 0, 8: 34},
 'Homeslost': {0: 0,
  1: 2439,
  2: 5,
  3: 48,
  4: 151,
  5: 2,
  6: 396,
  7: 1,
  8: 3500},
 'Area(estimated)(ha)': {0: 56688,
  1: 5400000,
  2: 6800000,
  3: 2500000,
  4: 490000,
  5: 36000,
  6: 1500000,
  7: 2200000,
  8: 18736070},
 'Area(estimated)(acres)': {0: 140080,
  1: 13300000,
  2: 16800000,
  3: 6180000,
  4: 1210000,
  5: 89000,
  6: 3710000,
  7: 5440000,
  8: 46300000}}

In [None]:
if (bushfiresbyState.count() == 0):
    historicalFires.insert(bs_dict)
    
else:
    print("Data already exists")