# DataFrames 101

## Selecting subsets

In [None]:
%matplotlib inline
import geopandas as gpd

## Referencing series (columns, fields, series, attributes, properties)

Method 1 - dot notation

    df.fieldname

Method 2 - bracket notation

    df['fieldname']
    
We will be using bracket notation for this course.  I think dot natation is confusing because it could be misinterpreted as a method or property.  Also you can't use variables to refer to column names which limit their usefulness.

In [None]:
raptors = gpd.read_file("data/Raptor_Nests.shp")
raptors.recentspec

In [None]:
raptors['recentspec']

You can refer to a column using a variable with bracket notation

In [None]:
fldname = 'recentstat'
raptors[fldname]

But you can't use a variable with dot notation

In [None]:
raptors.fldname

You can get a list of column names using the dataframes columns attribute

In [None]:
raptors.columns

And knowing this we can loop through the columns and print out the unique values for all string columns that have less than 20 unique values with just a few lines of code

In [None]:
for fld in raptors.columns:
    if raptors[fld].dtype == 'object' and raptors[fld].nunique()<20:
        print(raptors[fld].name, raptors[fld].unique())

### Subset of columns

In [None]:
raptors.head()

In [None]:
raptor_cols = raptors[["Nest_ID", "recentspec", "recentstat", "lastsurvey", 'geometry']]
raptor_cols.tail()

In [None]:
%whos

### Subset by attributes (multiple condition)

* Wrap each condition in parentheses
* Use & and | rather than **and** and **or**

In [None]:
raptors[(raptors['recentstat']=='ACTIVE NEST') | (raptors['recentspec']=='Swainsons Hawk')]

### Subset by coordinates

bounding box using python slice notation

In [None]:
raptors.plot()

In [None]:
raptors.cx[:-104.5, 39.5:].plot()

In [None]:
raptors = raptors.cx[:-104.3, 39.5:40.6]
raptors.plot()

In [None]:
y_min=raptors["lat_y_dd"].min()
y_max=raptors["lat_y_dd"].max()
y_step = (y_max-y_min)/3
for i in range(3):
    raptors.cx[:, y_min + i*y_step:y_min + (i+1)*y_step].plot()
    

In [None]:
y_min=raptors["lat_y_dd"].min()
y_max=raptors["lat_y_dd"].max()
y_step = (y_max-y_min)/3
x_min=raptors["long_x_dd"].min()
x_max=raptors["long_x_dd"].max()
x_step = (x_max-x_min)/3
for y in range(3):
    for x in range(3):
        raptors.cx[x_min + x*x_step:x_min + (x+1)*x_step, y_min + y*y_step:y_min + (y+1)*y_step].plot()

## Referencing cell values with loc, iloc, and at

### loc - referencing by column and row index values

In [None]:
raptors.columns

In [None]:
raptors.index

In [None]:
raptors

In [None]:
raptors.loc[877, 'lastsurvey']

In [None]:
raptors.loc[3:10, 'lastsurvey':'Nest_ID']

In [None]:
raptors.loc[[10, 3, 5], ['Nest_ID', 'lat_y_dd', 'recentspec']]

In [None]:
for row in raptors.index:
    for col in raptors.columns:
        print(row, col, raptors.loc[row, col])

### iloc - referencing by integers

shape property returns a tuple with the number of rows and columns

In [None]:
raptors.shape

In [None]:
raptors.iloc[861, 4]

In [None]:
raptors.iloc[100:200, 2:4]

In [None]:
raptors.iloc[[103, 197, 200], [1, 2, 5]]

In [None]:
for x in range(raptors.shape[0]):
    for y in range(raptors.shape[1]):
        print(x, y, raptors.iloc[x,y])

### at and iat - referencing single cell

Similar to loc and iloc but can only be used to reference a single cell

Values can be assigned

In [None]:
for x in range(raptors.shape[0]):
    for y in range(raptors.shape[1]):
        print(x, y, raptors.iat[x,y])

In [None]:
raptors.iat[200, 4]

In [None]:
raptors['recentspec'].unique()

In [None]:
raptors.iat[200,4] = "Great Blue Heron"
raptors.iat[200,4]

In [None]:
raptors['recentspec'].unique()

## Series Math

In [None]:
raptors['long_x_dd']/raptors['lat_y_dd']

### Creating a new column

In [None]:
raptors['long_plus_lat'] = raptors['long_x_dd']+raptors['lat_y_dd']

In [None]:
raptors

### dropping a row

In [None]:
raptors

In [None]:
raptors.drop(877, inplace=True)

### dropping a column

In [None]:
raptors.drop('long_plus_lat', axis=1, inplace=True)

## Choosing a column to use as an index

In [None]:
raptors.set_index('Nest_ID', verify_integrity=True, inplace=True)

In [None]:
raptors.drop(1002, inplace=True)

In [None]:
raptors

In [None]:
raptors.sort_values(['recentspec', 'recentstat', 'lastsurvey'], ascending=[True, True, False])