# Extract table from PDF report

In [None]:
# comment out this cell if tabula has not been installed
# !pip install tabula

In [1]:
# import the require package
import tabula
# check the version
tabula.environment_info()

Python version:
    3.7.4 (default, Aug 13 2019, 15:17:50) 
[Clang 4.0.1 (tags/RELEASE_401/final)]
Java version:
    java version "1.8.0_201"
Java(TM) SE Runtime Environment (build 1.8.0_201-b09)
Java HotSpot(TM) 64-Bit Server VM (build 25.201-b09, mixed mode)
tabula-py version: 2.1.1
platform: Darwin-19.4.0-x86_64-i386-64bit
uname:
    uname_result(system='Darwin', node='JasperLiangs-MacBook-Pro.local', release='19.4.0', version='Darwin Kernel Version 19.4.0: Wed Mar  4 22:28:40 PST 2020; root:xnu-6153.101.6~15/RELEASE_X86_64', machine='x86_64', processor='i386')
linux_distribution: ('Darwin', '19.4.0', '')
mac_ver: ('10.15.4', ('', '', ''), 'x86_64')
    


In [2]:
import tabula
# pdf_path = "https://github.com/chezou/tabula-py/raw/master/tests/resources/data.pdf"

dfs = tabula.read_pdf('plants list.pdf', stream=True, pages = [9,10],multiple_tables=False)

Got stderr: Sep 16, 2020 5:18:28 AM org.apache.pdfbox.rendering.CIDType0Glyph2D getPathForCharacterCode
Sep 16, 2020 5:18:28 AM org.apache.pdfbox.rendering.CIDType0Glyph2D getPathForCharacterCode



In [3]:
# get the table from pdf
initial_table = dfs[0]
initial_table.head(10)

Unnamed: 0,Insects,Synemon plana,golden sun moth,A day flying moth about 3cm,Critically
0,,,,wingspan. Formerly widespread,Endangered
1,,,,"through New South Wales (NSW),",
2,,,,"Australian Capital Territory (ACT), Vic",
3,,,,"and South Australia (SA), now has",
4,,,,a highly reduced and fragmented,
5,,,,distribution and considered extinct,
6,,,,in SA. Inhabits natural temperate,
7,,,,grassland and grassy woodlands.,
8,Plants,Carex tasmanica,curly sedge,From Vic and Tasmania (Tas) only.,Vulnerable
9,,,,Usually associated with drainage,


In [4]:
# exclude the introduction part
initial_table.drop(range(8),inplace=True)
initial_table

Unnamed: 0,Insects,Synemon plana,golden sun moth,A day flying moth about 3cm,Critically
8,Plants,Carex tasmanica,curly sedge,From Vic and Tasmania (Tas) only.,Vulnerable
9,,,,Usually associated with drainage,
10,,,,lines or marshes.,
11,,Cullen parvum,small scurf-pea,Only occurs in Vic and SA in,Endangered
12,,,,grassland and grassy woodland sites,
...,...,...,...,...,...
94,,,,spring and summer,
95,,Xerochrysum palustre,swamp everlasting,Occurs in southern Vic and a few,Vulnerable
96,,,,"sites in northern Tas. Perennial herb,",
97,,,,flowering November to March dying,


In [5]:
# rename to columns for name convinient
initial_table.columns=['SPECIES','SCIENTIFIC_NAME','COMMON_NAME','NOTES','EPBC_ACT_STATUS']
initial_table['SPECIES'] = 'Flora'

In [6]:
# reset the index and replace the missing value inside
initial_table = initial_table.reset_index(drop=True)
initial_table = initial_table.fillna("")
initial_table

Unnamed: 0,SPECIES,SCIENTIFIC_NAME,COMMON_NAME,NOTES,EPBC_ACT_STATUS
0,Flora,Carex tasmanica,curly sedge,From Vic and Tasmania (Tas) only.,Vulnerable
1,Flora,,,Usually associated with drainage,
2,Flora,,,lines or marshes.,
3,Flora,Cullen parvum,small scurf-pea,Only occurs in Vic and SA in,Endangered
4,Flora,,,grassland and grassy woodland sites,
...,...,...,...,...,...
86,Flora,,,spring and summer,
87,Flora,Xerochrysum palustre,swamp everlasting,Occurs in southern Vic and a few,Vulnerable
88,Flora,,,"sites in northern Tas. Perennial herb,",
89,Flora,,,flowering November to March dying,


In [8]:
# check the format of the non-numeric column(s) 
initial_table['NOTES'].tolist()

['From Vic and Tasmania (Tas) only.',
 'Usually associated with drainage',
 'lines or marshes.',
 'Only occurs in Vic and SA in',
 'grassland and grassy woodland sites',
 'which get irregular flooding. Flowers',
 'between October and February. Dies',
 'back, so not visible, in winter.',
 'Summer flowering large herb up',
 'to 5m wide with flower spikes to',
 '90cm. Current distribution restricted',
 'and fragmented, some populations',
 'occurring in urban areas.',
 'Known only from Vic on basalt',
 'plains north and west of Melbourne.',
 'Flowering in September and',
 'October.',
 'Only one known remaining',
 'population to the north of',
 'Melbourne. Flowering mid-October',
 'to early November. Pollination reliant',
 'on native bees.',
 'Known from southern NSW, Vic and',
 'SA. Flowering November to February',
 'and fruits may take up to 12 months',
 'to mature.',
 'Occurs in Tas, a single location in',
 'SA and is spread throughout Vic but',
 'only six known viable populations.',
 'G

In [9]:
# combine all the seperate lines in one text
for i in range(len(initial_table)-1,0,-1):
    if initial_table.loc[i,'EPBC_ACT_STATUS'] == "":
        # scientific name
        s_name = initial_table.loc[i,'SCIENTIFIC_NAME']
        pre_s_name = initial_table.loc[i-1,'SCIENTIFIC_NAME']
        initial_table.loc[i-1,'SCIENTIFIC_NAME'] = pre_s_name + " " + s_name
        # common name
        c_name = initial_table.loc[i,'COMMON_NAME']
        pre_c_name = initial_table.loc[i-1,'COMMON_NAME']
        initial_table.loc[i-1,'COMMON_NAME'] = pre_c_name + " " + c_name
        # text part
        text = initial_table.loc[i,'NOTES']
        pre_text = initial_table.loc[i-1,'NOTES']
        initial_table.loc[i-1,'NOTES'] = pre_text + " " + text

In [10]:
# check the redundant spaces
initial_table.loc[88,"SCIENTIFIC_NAME"] in [""," ","  ","   "]

True

In [11]:
# remove all the redundant spaces and invalid rows
flora = initial_table.drop(initial_table[initial_table.EPBC_ACT_STATUS.isin([""," ","  ","   "])].index)
flora = flora.reset_index(drop=True)
flora

Unnamed: 0,SPECIES,SCIENTIFIC_NAME,COMMON_NAME,NOTES,EPBC_ACT_STATUS
0,Flora,Carex tasmanica,curly sedge,From Vic and Tasmania (Tas) only. Usually asso...,Vulnerable
1,Flora,Cullen parvum,small scurf-pea,Only occurs in Vic and SA in grassland and gra...,Endangered
2,Flora,Dianella amoena,matted flax-lily,Summer flowering large herb up to 5m wide with...,Endangered
3,Flora,Diuris sp. aff. chryseopsis (Basalt Plains),"small golden moths, early golden moths",Known only from Vic on basalt plains north and...,Endangered
4,Flora,Diuris fragrantissima,"sunshine diuris, white diuris, fragrant double...",Only one known remaining population to the nor...,Endangered
5,Flora,Dodonaea procumbens,trailing hop-bush,"Known from southern NSW, Vic and SA. Flowering...",Vulnerable
6,Flora,Glycine latrobeana,"clover glycine, purple clover","Occurs in Tas, a single location in SA and is ...",Vulnerable
7,Flora,Lachnagrostis adamsonii,adamson’s blown- grass,Endemic to south-central and south- western Vi...,Endangered
8,Flora,Lepidium aschersonii,spiny pepper-cress,Thought to be extinct in NSW. Occurs in the so...,Vulnerable
9,Flora,Lepidium hyssopifolium,basalt pepper-cress,Vic distribution from only three areas in cent...,Endangered


In [12]:
# fix the format errors in the name columns
cd_sname = flora.loc[11,'SCIENTIFIC_NAME']
cd_cname = flora.loc[11,'COMMON_NAME']
cd_note = flora.loc[11,'NOTES']
cd_status = flora.loc[11,'EPBC_ACT_STATUS']
flora.loc[11,'SCIENTIFIC_NAME'] = cd_sname + " " + flora.loc[12,'SCIENTIFIC_NAME']
flora.loc[11,'COMMON_NAME'] = cd_cname + " " + flora.loc[12,'COMMON_NAME']
flora.loc[11,'NOTES'] = cd_note + " " + flora.loc[12,'NOTES']
flora.loc[11,'EPBC_ACT_STATUS'] = cd_status + " " + flora.loc[12,'EPBC_ACT_STATUS']
flora = flora.drop(12)
flora = flora.reset_index(drop=True)
flora

Unnamed: 0,SPECIES,SCIENTIFIC_NAME,COMMON_NAME,NOTES,EPBC_ACT_STATUS
0,Flora,Carex tasmanica,curly sedge,From Vic and Tasmania (Tas) only. Usually asso...,Vulnerable
1,Flora,Cullen parvum,small scurf-pea,Only occurs in Vic and SA in grassland and gra...,Endangered
2,Flora,Dianella amoena,matted flax-lily,Summer flowering large herb up to 5m wide with...,Endangered
3,Flora,Diuris sp. aff. chryseopsis (Basalt Plains),"small golden moths, early golden moths",Known only from Vic on basalt plains north and...,Endangered
4,Flora,Diuris fragrantissima,"sunshine diuris, white diuris, fragrant double...",Only one known remaining population to the nor...,Endangered
5,Flora,Dodonaea procumbens,trailing hop-bush,"Known from southern NSW, Vic and SA. Flowering...",Vulnerable
6,Flora,Glycine latrobeana,"clover glycine, purple clover","Occurs in Tas, a single location in SA and is ...",Vulnerable
7,Flora,Lachnagrostis adamsonii,adamson’s blown- grass,Endemic to south-central and south- western Vi...,Endangered
8,Flora,Lepidium aschersonii,spiny pepper-cress,Thought to be extinct in NSW. Occurs in the so...,Vulnerable
9,Flora,Lepidium hyssopifolium,basalt pepper-cress,Vic distribution from only three areas in cent...,Endangered


In [13]:
# make the name format consist
flora.loc[3,'COMMON_NAME'] = 'clumping golden moths'
flora['COMMON_NAME'] = flora['COMMON_NAME'].str.title()
flora

Unnamed: 0,SPECIES,SCIENTIFIC_NAME,COMMON_NAME,NOTES,EPBC_ACT_STATUS
0,Flora,Carex tasmanica,Curly Sedge,From Vic and Tasmania (Tas) only. Usually asso...,Vulnerable
1,Flora,Cullen parvum,Small Scurf-Pea,Only occurs in Vic and SA in grassland and gra...,Endangered
2,Flora,Dianella amoena,Matted Flax-Lily,Summer flowering large herb up to 5m wide with...,Endangered
3,Flora,Diuris sp. aff. chryseopsis (Basalt Plains),Clumping Golden Moths,Known only from Vic on basalt plains north and...,Endangered
4,Flora,Diuris fragrantissima,"Sunshine Diuris, White Diuris, Fragrant Double...",Only one known remaining population to the nor...,Endangered
5,Flora,Dodonaea procumbens,Trailing Hop-Bush,"Known from southern NSW, Vic and SA. Flowering...",Vulnerable
6,Flora,Glycine latrobeana,"Clover Glycine, Purple Clover","Occurs in Tas, a single location in SA and is ...",Vulnerable
7,Flora,Lachnagrostis adamsonii,Adamson’S Blown- Grass,Endemic to south-central and south- western Vi...,Endangered
8,Flora,Lepidium aschersonii,Spiny Pepper-Cress,Thought to be extinct in NSW. Occurs in the so...,Vulnerable
9,Flora,Lepidium hyssopifolium,Basalt Pepper-Cress,Vic distribution from only three areas in cent...,Endangered


# Data cleaning

In [14]:
# select the index of flowering plants
f_count = 0
flower = []
for i in flora.index:
    if "flowering" in flora.loc[i,'NOTES'].lower().split() or "flowers" in flora.loc[i,'NOTES'].lower().split():
        f_count+=1
        flower.append(i)
print(f_count)
print(flower)

17
[1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20]


In [15]:
# select the index of endangered or critically endangered plants
v_count = 0
endangered = []
for j in flora.index:
    if flora.loc[j,'EPBC_ACT_STATUS']!="Vulnerable":
        v_count+=1
        endangered.append(j)
print(v_count)
print(endangered)

13
[1, 2, 3, 4, 7, 9, 10, 11, 12, 13, 14, 15, 16]


In [16]:
# extract the endangered plants inside the table
target_index = list(set(endangered).intersection(flower))
target_index

[1, 2, 3, 4, 9, 10, 11, 12, 13, 14, 15, 16]

In [17]:
flower_index = flora.index.isin(target_index)
flora.drop(flora[~flower_index].index,inplace=True)
flora = flora.reset_index(drop=True)
flora

Unnamed: 0,SPECIES,SCIENTIFIC_NAME,COMMON_NAME,NOTES,EPBC_ACT_STATUS
0,Flora,Cullen parvum,Small Scurf-Pea,Only occurs in Vic and SA in grassland and gra...,Endangered
1,Flora,Dianella amoena,Matted Flax-Lily,Summer flowering large herb up to 5m wide with...,Endangered
2,Flora,Diuris sp. aff. chryseopsis (Basalt Plains),Clumping Golden Moths,Known only from Vic on basalt plains north and...,Endangered
3,Flora,Diuris fragrantissima,"Sunshine Diuris, White Diuris, Fragrant Double...",Only one known remaining population to the nor...,Endangered
4,Flora,Lepidium hyssopifolium,Basalt Pepper-Cress,Vic distribution from only three areas in cent...,Endangered
5,Flora,Leucochrysum albicans var. tricolor,Hoary Sunray,"Known from NSW, ACT, Vic and Tas. Distribution...",Endangered
6,Flora,Pimelea spinescens subsp. spinescens,"Plains Rice-Flower, Spiny Rice-Flower, Prickly...",Endemic to south-western and central Vic. Stun...,Critically Endangered
7,Flora,Prasophyllum diversiflorum,Gorae Leek-Orchid,"Endemic to south-western Vic, known from only ...",Endangered
8,Flora,Prasophyllum frenchii,"Maroon Leek-Orchid, Slaty Leek-Orchid, Stout L...",Distributed in Vic and far south- eastern SA. ...,Endangered
9,Flora,Prasophyllum suaveolens,Fragrant Leek-Orchid,Known only from western Vic. Slender orchid to...,Endangered


# Data wrangling

In [18]:
import re

In [51]:
# extract the flowering period mentioned in the descriptions
flora["FLOWERING_PERIOD"] = ""
flora["HABITAT"] = "Tussock Plains - lowland tussock grassland"
for i in flora.index:
    note = flora.loc[i,"NOTES"].lower()
    # add flowering period
    period = re.search(r'(flower[\w\W]*?)(?:[\.\,])', note).group(1)
    flora.loc[i,"FLOWERING_PERIOD"] = period.capitalize()
    # add habitat
    woodland = re.findall(r'woodland.*?', note)
    if len(woodland) >= 1:
        flora.loc[i,"HABITAT"] = "Woodland - Grassy Eucalypt Woodland"
    elif "basalt" in flora.loc[i,"COMMON_NAME"].lower().split() or "basalt" in flora.loc[i,"NOTES"].lower().split():
        flora.loc[i,"HABITAT"] = "Rocky areas - Western (Basalt) Plains"
flora

1
2
2
2


Unnamed: 0,SPECIES,SCIENTIFIC_NAME,COMMON_NAME,NOTES,EPBC_ACT_STATUS,FLOWERING_PERIOD,HABITAT
0,Flora,Cullen parvum,Small Scurf-Pea,Only occurs in Vic and SA in grassland and gra...,Endangered,Flowers between october and february,Woodland - Grassy Eucalypt Woodland
1,Flora,Dianella amoena,Matted Flax-Lily,Summer flowering large herb up to 5m wide with...,Endangered,Flowering large herb up to 5m wide with flower...,Tussock Plains - lowland tussock grassland
2,Flora,Diuris sp. aff. chryseopsis (Basalt Plains),Clumping Golden Moths,Known only from Vic on basalt plains north and...,Endangered,Flowering in september and october,Rocky areas - Western (Basalt) Plains
3,Flora,Diuris fragrantissima,"Sunshine Diuris, White Diuris, Fragrant Double...",Only one known remaining population to the nor...,Endangered,Flowering mid-october to early november,Tussock Plains - lowland tussock grassland
4,Flora,Lepidium hyssopifolium,Basalt Pepper-Cress,Vic distribution from only three areas in cent...,Endangered,Flowering from december to february,Rocky areas - Western (Basalt) Plains
5,Flora,Leucochrysum albicans var. tricolor,Hoary Sunray,"Known from NSW, ACT, Vic and Tas. Distribution...",Endangered,Flowers in spring to summer,Tussock Plains - lowland tussock grassland
6,Flora,Pimelea spinescens subsp. spinescens,"Plains Rice-Flower, Spiny Rice-Flower, Prickly...",Endemic to south-western and central Vic. Stun...,Critically Endangered,Flowers in april to august,Tussock Plains - lowland tussock grassland
7,Flora,Prasophyllum diversiflorum,Gorae Leek-Orchid,"Endemic to south-western Vic, known from only ...",Endangered,Flowering december to february,Tussock Plains - lowland tussock grassland
8,Flora,Prasophyllum frenchii,"Maroon Leek-Orchid, Slaty Leek-Orchid, Stout L...",Distributed in Vic and far south- eastern SA. ...,Endangered,Flowering october to december,Tussock Plains - lowland tussock grassland
9,Flora,Prasophyllum suaveolens,Fragrant Leek-Orchid,Known only from western Vic. Slender orchid to...,Endangered,Flowering october and november,Tussock Plains - lowland tussock grassland


In [52]:
# insert the habitat columns
flora.loc[1,"HABITAT"] = "Woodland - Grassy Eucalypt Woodland"
flora.loc[3,"HABITAT"] = "Rocky areas - Western (Basalt) Plains"
flora.loc[7,"HABITAT"] = "Wetland - seasonal herbaceous wetlands"
flora.loc[8,"HABITAT"] = "Wetland - seasonal herbaceous wetlands"
flora.loc[9,"HABITAT"] = "Woodland - Grassy Eucalypt Woodland"
flora.loc[11,"HABITAT"] = "Rocky areas - Western (Basalt) Plains"
flora

Unnamed: 0,SPECIES,SCIENTIFIC_NAME,COMMON_NAME,NOTES,EPBC_ACT_STATUS,FLOWERING_PERIOD,HABITAT
0,Flora,Cullen parvum,Small Scurf-Pea,Only occurs in Vic and SA in grassland and gra...,Endangered,Flowers between october and february,Woodland - Grassy Eucalypt Woodland
1,Flora,Dianella amoena,Matted Flax-Lily,Summer flowering large herb up to 5m wide with...,Endangered,Flowering large herb up to 5m wide with flower...,Woodland - Grassy Eucalypt Woodland
2,Flora,Diuris sp. aff. chryseopsis (Basalt Plains),Clumping Golden Moths,Known only from Vic on basalt plains north and...,Endangered,Flowering in september and october,Rocky areas - Western (Basalt) Plains
3,Flora,Diuris fragrantissima,"Sunshine Diuris, White Diuris, Fragrant Double...",Only one known remaining population to the nor...,Endangered,Flowering mid-october to early november,Rocky areas - Western (Basalt) Plains
4,Flora,Lepidium hyssopifolium,Basalt Pepper-Cress,Vic distribution from only three areas in cent...,Endangered,Flowering from december to february,Rocky areas - Western (Basalt) Plains
5,Flora,Leucochrysum albicans var. tricolor,Hoary Sunray,"Known from NSW, ACT, Vic and Tas. Distribution...",Endangered,Flowers in spring to summer,Tussock Plains - lowland tussock grassland
6,Flora,Pimelea spinescens subsp. spinescens,"Plains Rice-Flower, Spiny Rice-Flower, Prickly...",Endemic to south-western and central Vic. Stun...,Critically Endangered,Flowers in april to august,Tussock Plains - lowland tussock grassland
7,Flora,Prasophyllum diversiflorum,Gorae Leek-Orchid,"Endemic to south-western Vic, known from only ...",Endangered,Flowering december to february,Wetland - seasonal herbaceous wetlands
8,Flora,Prasophyllum frenchii,"Maroon Leek-Orchid, Slaty Leek-Orchid, Stout L...",Distributed in Vic and far south- eastern SA. ...,Endangered,Flowering october to december,Wetland - seasonal herbaceous wetlands
9,Flora,Prasophyllum suaveolens,Fragrant Leek-Orchid,Known only from western Vic. Slender orchid to...,Endangered,Flowering october and november,Woodland - Grassy Eucalypt Woodland


In [54]:
# create the flowering period column
flora.loc[0,'FLOWERING_PERIOD'] = "From October to February"
flora.loc[1,'FLOWERING_PERIOD'] = "From December to February"
flora.loc[2,'FLOWERING_PERIOD'] = "From September to October"
flora.loc[3,'FLOWERING_PERIOD'] = "From October to November"
flora.loc[4,'FLOWERING_PERIOD'] = "From December to February"
flora.loc[5,'FLOWERING_PERIOD'] = "From September to February"
flora.loc[6,'FLOWERING_PERIOD'] = "From April to August"
flora.loc[7,'FLOWERING_PERIOD'] = "From December to February"
flora.loc[8,'FLOWERING_PERIOD'] = "From October to December"
flora.loc[9,'FLOWERING_PERIOD'] = "From October to November"
flora.loc[10,'FLOWERING_PERIOD'] = "From November to January"
flora.loc[11,'FLOWERING_PERIOD'] = "From October to January"
flora

Unnamed: 0,SPECIES,SCIENTIFIC_NAME,COMMON_NAME,NOTES,EPBC_ACT_STATUS,FLOWERING_PERIOD,HABITAT
0,Flora,Cullen parvum,Small Scurf-Pea,Only occurs in Vic and SA in grassland and gra...,Endangered,From October to February,Woodland - Grassy Eucalypt Woodland
1,Flora,Dianella amoena,Matted Flax-Lily,Summer flowering large herb up to 5m wide with...,Endangered,From December to February,Woodland - Grassy Eucalypt Woodland
2,Flora,Diuris sp. aff. chryseopsis (Basalt Plains),Clumping Golden Moths,Known only from Vic on basalt plains north and...,Endangered,From September to October,Rocky areas - Western (Basalt) Plains
3,Flora,Diuris fragrantissima,"Sunshine Diuris, White Diuris, Fragrant Double...",Only one known remaining population to the nor...,Endangered,From October to November,Rocky areas - Western (Basalt) Plains
4,Flora,Lepidium hyssopifolium,Basalt Pepper-Cress,Vic distribution from only three areas in cent...,Endangered,From December to February,Rocky areas - Western (Basalt) Plains
5,Flora,Leucochrysum albicans var. tricolor,Hoary Sunray,"Known from NSW, ACT, Vic and Tas. Distribution...",Endangered,From September to February,Tussock Plains - lowland tussock grassland
6,Flora,Pimelea spinescens subsp. spinescens,"Plains Rice-Flower, Spiny Rice-Flower, Prickly...",Endemic to south-western and central Vic. Stun...,Critically Endangered,From April to August,Tussock Plains - lowland tussock grassland
7,Flora,Prasophyllum diversiflorum,Gorae Leek-Orchid,"Endemic to south-western Vic, known from only ...",Endangered,From December to February,Wetland - seasonal herbaceous wetlands
8,Flora,Prasophyllum frenchii,"Maroon Leek-Orchid, Slaty Leek-Orchid, Stout L...",Distributed in Vic and far south- eastern SA. ...,Endangered,From October to December,Wetland - seasonal herbaceous wetlands
9,Flora,Prasophyllum suaveolens,Fragrant Leek-Orchid,Known only from western Vic. Slender orchid to...,Endangered,From October to November,Woodland - Grassy Eucalypt Woodland


In [55]:
flora.columns.tolist()

['SPECIES',
 'SCIENTIFIC_NAME',
 'COMMON_NAME',
 'NOTES',
 'EPBC_ACT_STATUS',
 'FLOWERING_PERIOD',
 'HABITAT']

In [57]:
# formatting the table
new_row1 = {'SPECIES':'Flora', 'SCIENTIFIC_NAME':'Senecio pterophorus', \
           'COMMON_NAME':'African Daisy', 'NOTES':"African daisy is a strong competitor in natural environments and forms dense thickets that exclude native plant species, resulting in a reduction in habitat.", \
          'EPBC_ACT_STATUS':"Secured", 'FLOWERING_PERIOD': "From November to March", \
          'HABITAT': "Woodland - Grassy Eucalypt Woodland"}
flora = flora.append(new_row1, ignore_index=True)
new_row2 = {'SPECIES':'Flora', 'SCIENTIFIC_NAME':'Pennisetum macrourum Trin', \
           'COMMON_NAME':'African feather grass', 'NOTES':"African feather grass is an aggressive invader and often competes and displaces riparian vegetation.", \
          'EPBC_ACT_STATUS':"Secured", 'FLOWERING_PERIOD': "From November to February", \
          'HABITAT': "Wetland - seasonal herbaceous wetlands"}
flora = flora.append(new_row2, ignore_index=True)
new_row3 = {'SPECIES':'Flora', 'SCIENTIFIC_NAME':'Rubus fruticosus', \
           'COMMON_NAME':'Blackberry', 'NOTES':"Blackberry is considered a serious environmental threat and is highly invasive. It can provide harbour and a food source for pest species, as well as serve to outcompete and eliminate other vegetation by excluding light from the soil surface.", \
          'EPBC_ACT_STATUS':"Secured", 'FLOWERING_PERIOD': "From November to January", \
          'HABITAT': "Woodland - Grassy Eucalypt Woodland"}
flora = flora.append(new_row3, ignore_index=True)

new_row4 = {'SPECIES':'Flora', 'SCIENTIFIC_NAME':'Acroptilon repens', \
           'COMMON_NAME':'Hardheads', 'NOTES':"Hardheads are one of the most competitive of all weeds. They can virtually exclude all other vegetation and reduce yield in dry land cropping by as much as 80%.", \
          'EPBC_ACT_STATUS':"Secured", 'FLOWERING_PERIOD': "From November to February", \
          'HABITAT': "Rocky areas - Western (Basalt) Plains"}
flora = flora.append(new_row4, ignore_index=True)

new_row5 = {'SPECIES':'Flora', 'SCIENTIFIC_NAME':'Scolymus hispanicus', \
           'COMMON_NAME':'Golden thistle', 'NOTES':"The stout spines of the golden thistle are present for much of the year and may injure animals.", \
          'EPBC_ACT_STATUS':"Secured", 'FLOWERING_PERIOD': "From November to February", \
          'HABITAT': "Rocky areas - Western (Basalt) Plains"}
flora = flora.append(new_row5, ignore_index=True)

new_row6 = {'SPECIES':'Flora', 'SCIENTIFIC_NAME':'Opuntia robusta', \
           'COMMON_NAME':'Wheel cactus', 'NOTES':"Being long-lived, patches of the plant also provide effective and permanent harbour for pest animals such as rabbits.", \
          'EPBC_ACT_STATUS':"Secured", 'FLOWERING_PERIOD': "From October to November", \
          'HABITAT': "Rocky areas - Western (Basalt) Plains"}
flora = flora.append(new_row6, ignore_index=True)

flora

Unnamed: 0,SPECIES,SCIENTIFIC_NAME,COMMON_NAME,NOTES,EPBC_ACT_STATUS,FLOWERING_PERIOD,HABITAT
0,Flora,Cullen parvum,Small Scurf-Pea,Only occurs in Vic and SA in grassland and gra...,Endangered,From October to February,Woodland - Grassy Eucalypt Woodland
1,Flora,Dianella amoena,Matted Flax-Lily,Summer flowering large herb up to 5m wide with...,Endangered,From December to February,Woodland - Grassy Eucalypt Woodland
2,Flora,Diuris sp. aff. chryseopsis (Basalt Plains),Clumping Golden Moths,Known only from Vic on basalt plains north and...,Endangered,From September to October,Rocky areas - Western (Basalt) Plains
3,Flora,Diuris fragrantissima,"Sunshine Diuris, White Diuris, Fragrant Double...",Only one known remaining population to the nor...,Endangered,From October to November,Rocky areas - Western (Basalt) Plains
4,Flora,Lepidium hyssopifolium,Basalt Pepper-Cress,Vic distribution from only three areas in cent...,Endangered,From December to February,Rocky areas - Western (Basalt) Plains
5,Flora,Leucochrysum albicans var. tricolor,Hoary Sunray,"Known from NSW, ACT, Vic and Tas. Distribution...",Endangered,From September to February,Tussock Plains - lowland tussock grassland
6,Flora,Pimelea spinescens subsp. spinescens,"Plains Rice-Flower, Spiny Rice-Flower, Prickly...",Endemic to south-western and central Vic. Stun...,Critically Endangered,From April to August,Tussock Plains - lowland tussock grassland
7,Flora,Prasophyllum diversiflorum,Gorae Leek-Orchid,"Endemic to south-western Vic, known from only ...",Endangered,From December to February,Wetland - seasonal herbaceous wetlands
8,Flora,Prasophyllum frenchii,"Maroon Leek-Orchid, Slaty Leek-Orchid, Stout L...",Distributed in Vic and far south- eastern SA. ...,Endangered,From October to December,Wetland - seasonal herbaceous wetlands
9,Flora,Prasophyllum suaveolens,Fragrant Leek-Orchid,Known only from western Vic. Slender orchid to...,Endangered,From October to November,Woodland - Grassy Eucalypt Woodland


In [58]:
# output the table as csv file
flora.to_csv('Flora-Checklist1.csv')