In [1]:
import arcpy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

DBPATH=r"C:\Users\mike\Documents\OSU\GEOG561\Project - Florida Scrub Zone\FL_Zone_9a_9b_Scrub_Incursion_Restoration\FL_Zone_9a_9b_Scrub_Incursion_Restoration.gdb"
arcpy.env.workspace=DBPATH

In [2]:
knownScrubSpeciesObs = numpy.asarray(arcpy.da.TableToNumPyArray('csKnownScrubZone9', 'species').tolist()).flatten()
knownScrubSpeciesObsHist = (pd.Series(knownScrubSpeciesObs).value_counts())
knownScrubSpeciesObsHist

Serenoa repens            82
Palafoxia feayi           80
Ceratiola ericoides       75
Quercus geminata          68
Ximenia americana         66
Lyonia ferruginea         59
Asclepias tuberosa        59
Pinus clausa              56
Opuntia humifusa          55
Balduina angustifolia     54
Quercus myrtifolia        49
Vaccinium myrsinites      48
Hypericum tenuifolium     47
Bejaria racemosa          45
Quercus chapmanii         43
Pityopsis graminifolia    42
Persea humilis            35
Commelina erecta          34
Polygonella robusta       31
Carya floridana           31
Piloblephis rigida        31
Licania michauxii         29
Quercus inopina           26
Lyonia fruticosa          26
Dalea feayi               24
Yucca filamentosa         24
Ilex opaca                24
Sabal etonia              23
Asimina obovata           22
Cartrema floridana        13
Vaccinium darrowii        12
Chrysopsis floridana      11
Conradina grandiflora      9
Trichostema dichotomum     5
Zamia integrif

In [None]:
knownScrubSpeciesObsHistP = knownScrubSpeciesObsHist/len(knownScrubSpeciesObs)
knownScrubSpeciesObsHistP.plot(kind='bar')
plt.hlines((.01, .02, .03, .04, .05, .06), 0, 1000)
plt.title("Citizen Science Observations in Known Scrub Areas, Total Observations: %s" % len(knownScrubSpeciesObs)) 
plt.show()

  fig = self.plt.figure(figsize=self.figsize)


In [3]:
clusterIds = numpy.asarray(arcpy.da.TableToNumPyArray('csScrubPlantsZone9Communities_poly', 'CLUSTER_ID').tolist()).flatten()
clusterIds

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [4]:
#This takes a few min to run.
plantCommunities = arcpy.da.TableToNumPyArray('csScrubPlantsZone9Communities',  ('CLUSTER_ID','species'))

plantCommunities_df = pd.DataFrame(plantCommunities)
plantCommunityHists = {}
plantCommunity = {}
for cluster_id in clusterIds:
    plantCommunity[cluster_id] = plantCommunities_df.loc[plantCommunities_df['CLUSTER_ID']==cluster_id]
    plantCommunityHists[cluster_id] = plantCommunity[cluster_id].value_counts()

In [5]:
for cluster_id in clusterIds:
    plantCommunityHists[cluster_id].plot(kind='bar')
    plt.title("Distribution of Citizen Science Observations in Cluster %s" % cluster_id) 
    plt.subplots_adjust(bottom=.5)
    plt.show()   


In [6]:
contigency = pd.crosstab(plantCommunities_df['CLUSTER_ID'], plantCommunities_df['species'])


In [9]:
import scipy
from scipy import stats
alpha =  .2

stat, p, dof, expected = scipy.stats.chi2_contingency(contigency)
print("stat, p, dof")
print(stat, p, dof)


if p <= alpha:
    print('reject H0')
else:
    print('fail to reject H0')


stat, p, dof
1512.825711521683 6.433374686196485e-147 341
reject H0


In [11]:
scipy.stats.chi2.ppf(.8, 341)

362.76220277757153

In [12]:
plants=list(np.unique(knownScrubSpeciesObs))
plants

['Asclepias tuberosa', 'Asimina obovata', 'Balduina angustifolia', 'Bejaria racemosa', 'Cartrema floridana', 'Carya floridana', 'Ceratiola ericoides', 'Chrysopsis floridana', 'Commelina erecta', 'Conradina grandiflora', 'Dalea feayi', 'Hypericum tenuifolium', 'Ilex opaca', 'Licania michauxii', 'Lyonia ferruginea', 'Lyonia fruticosa', 'Opuntia humifusa', 'Palafoxia feayi', 'Persea humilis', 'Piloblephis rigida', 'Pinus clausa', 'Pityopsis graminifolia', 'Polygonella robusta', 'Quercus chapmanii', 'Quercus geminata', 'Quercus inopina', 'Quercus myrtifolia', 'Sabal etonia', 'Serenoa repens', 'Trichostema dichotomum', 'Vaccinium darrowii', 'Vaccinium myrsinites', 'Ximenia americana', 'Yucca filamentosa', 'Zamia integrifolia']

In [13]:
knownScrubSpeciesObsSub=np.random.choice(knownScrubSpeciesObs, int(len(knownScrubSpeciesObs)*.3))

In [14]:
knownScrubSpeciesObsSub

array(['Balduina angustifolia', 'Lyonia ferruginea', 'Serenoa repens',
       'Hypericum tenuifolium', 'Quercus geminata', 'Licania michauxii',
       'Ximenia americana', 'Asclepias tuberosa', 'Hypericum tenuifolium',
       'Dalea feayi', 'Piloblephis rigida', 'Dalea feayi',
       'Serenoa repens', 'Balduina angustifolia', 'Serenoa repens',
       'Opuntia humifusa', 'Polygonella robusta', 'Persea humilis',
       'Hypericum tenuifolium', 'Lyonia ferruginea', 'Lyonia ferruginea',
       'Persea humilis', 'Ceratiola ericoides', 'Asimina obovata',
       'Carya floridana', 'Ximenia americana', 'Serenoa repens',
       'Yucca filamentosa', 'Ximenia americana', 'Pinus clausa',
       'Ceratiola ericoides', 'Quercus myrtifolia', 'Dalea feayi',
       'Licania michauxii', 'Balduina angustifolia', 'Opuntia humifusa',
       'Balduina angustifolia', 'Licania michauxii',
       'Conradina grandiflora', 'Lyonia ferruginea', 'Ximenia americana',
       'Licania michauxii', 'Balduina angustifol

In [15]:
knownScrubSpeciesObsHist = numpy.histogram(knownScrubSpeciesObsSub, bins=plants)

In [17]:
plantCommunitiesHist = {}
alpha =  .2

print("cluster_id, stat, p, dof, alpha, result")

for cluster_id in clusterIds:
    plantCommunitiesHist[cluster_id] = numpy.histogram(plantCommunities_df.loc[plantCommunities_df['CLUSTER_ID']==cluster_id]['species'], 
                        bins=plants)
   
    dof = len(knownScrubSpeciesObsHist[0]) - 1 
    stat, p = scipy.stats.chisquare(plantCommunitiesHist[cluster_id][0], knownScrubSpeciesObsHist[0])
    
    if p <= alpha:
        result = 'reject H0'
    else:
        result = 'fail to reject H0'

    print("%s, %s, %s, %s, %s, %s" % (cluster_id, stat, p, dof, alpha, result))


cluster_id, stat, p, dof, alpha, result
1, 325.47486957486956, 8.512986392018349e-50, 33, 0.2, reject H0
2, 334.6794844044844, 1.3116501514517456e-51, 33, 0.2, reject H0
3, 250.4375152625153, 2.9766912045726127e-35, 33, 0.2, reject H0
4, 1502.0213369963367, 5.0886544786953205e-295, 33, 0.2, reject H0
5, 342.7192945942946, 3.394437967004731e-53, 33, 0.2, reject H0
6, 231.93919136419137, 9.525096642485425e-32, 33, 0.2, reject H0
7, 279.6875457875458, 7.236257384067044e-41, 33, 0.2, reject H0
8, 312.4017343767344, 3.1246646166678903e-47, 33, 0.2, reject H0
9, 338.24609557109557, 2.595583318905205e-52, 33, 0.2, reject H0
10, 345.5241230991231, 9.467354595188875e-54, 33, 0.2, reject H0
11, 349.83358308358305, 1.3284949545928818e-54, 33, 0.2, reject H0
12, 351.99045676545677, 4.967308106789749e-55, 33, 0.2, reject H0


In [246]:
for cluster_id in clusterIds:
    print(plantCommunitiesHist[cluster_id])

(array([0, 0, 2, 3, 0, 0, 9, 0, 0, 0, 2, 0, 1, 2, 8, 0, 0, 0, 0, 0, 1, 0,
       0, 2, 4, 0, 4, 1, 3, 0, 0, 3, 0, 0], dtype=int64), array(['Asclepias tuberosa', 'Asimina obovata', 'Balduina angustifolia',
       'Bejaria racemosa', 'Cartrema floridana', 'Carya floridana',
       'Ceratiola ericoides', 'Chrysopsis floridana', 'Commelina erecta',
       'Conradina grandiflora', 'Dalea feayi', 'Hypericum tenuifolium',
       'Ilex opaca', 'Licania michauxii', 'Lyonia ferruginea',
       'Lyonia fruticosa', 'Opuntia humifusa', 'Palafoxia feayi',
       'Persea humilis', 'Piloblephis rigida', 'Pinus clausa',
       'Pityopsis graminifolia', 'Polygonella robusta',
       'Quercus chapmanii', 'Quercus geminata', 'Quercus inopina',
       'Quercus myrtifolia', 'Sabal etonia', 'Serenoa repens',
       'Trichostema dichotomum', 'Vaccinium darrowii',
       'Vaccinium myrsinites', 'Ximenia americana', 'Yucca filamentosa',
       'Zamia integrifolia'], dtype='<U22'))
(array([1, 2, 1, 0, 2, 2, 1, 0

       'Zamia integrifolia'], dtype='<U22'))
(array([0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 2, 0, 3, 2, 0, 0, 0, 3, 0,
       0, 1, 3, 0, 1, 0, 9, 0, 0, 3, 0, 1], dtype=int64), array(['Asclepias tuberosa', 'Asimina obovata', 'Balduina angustifolia',
       'Bejaria racemosa', 'Cartrema floridana', 'Carya floridana',
       'Ceratiola ericoides', 'Chrysopsis floridana', 'Commelina erecta',
       'Conradina grandiflora', 'Dalea feayi', 'Hypericum tenuifolium',
       'Ilex opaca', 'Licania michauxii', 'Lyonia ferruginea',
       'Lyonia fruticosa', 'Opuntia humifusa', 'Palafoxia feayi',
       'Persea humilis', 'Piloblephis rigida', 'Pinus clausa',
       'Pityopsis graminifolia', 'Polygonella robusta',
       'Quercus chapmanii', 'Quercus geminata', 'Quercus inopina',
       'Quercus myrtifolia', 'Sabal etonia', 'Serenoa repens',
       'Trichostema dichotomum', 'Vaccinium darrowii',
       'Vaccinium myrsinites', 'Ximenia americana', 'Yucca filamentosa',
       'Zamia integrifolia'], 