In [1]:
# Make sure pandas is loaded
import pandas as pd

# Read in the survey CSV
surveys_df = pd.read_csv("data/surveys.csv")

In [2]:
# TIP: use the .head() method we saw earlier to make output shorter
# Method 1: select a 'subset' of the data using the column name
surveys_df['species_id']

# Method 2: use the column name as an 'attribute'; gives the same output
surveys_df.species_id

0         NL
1         NL
2         DM
3         DM
4         DM
        ... 
35544     AH
35545     AH
35546     RM
35547     DO
35548    NaN
Name: species_id, Length: 35549, dtype: object

In [3]:
# Creates an object, surveys_species, that only contains the `species_id` column
surveys_species = surveys_df['species_id']

In [4]:
# Select the species and plot columns from the DataFrame
surveys_df[['species_id', 'plot_id']]

# What happens when you flip the order?
surveys_df[['plot_id', 'species_id']]

# What happens if you ask for a column that doesn't exist?
surveys_df['speciess']

KeyError: 'speciess'

In [5]:
# Create a list of numbers:
a = [1, 2, 3, 4, 5]

In [6]:
# Select rows 0, 1, 2 (row 3 is not selected)
surveys_df[0:3]

Unnamed: 0,record_id,month,day,year,plot_id,species_id,sex,hindfoot_length,weight
0,1,7,16,1977,2,NL,M,32.0,
1,2,7,16,1977,3,NL,M,33.0,
2,3,7,16,1977,2,DM,F,37.0,


In [7]:
# Select the first 5 rows (rows 0, 1, 2, 3, 4)
surveys_df[:5]

# Select the last element in the list
# (the slice starts at the last element, and ends at the end of the list)
surveys_df[-1:]

Unnamed: 0,record_id,month,day,year,plot_id,species_id,sex,hindfoot_length,weight
35548,35549,12,31,2002,5,,,,


In [8]:
# Using the 'copy() method'
true_copy_surveys_df = surveys_df.copy()

# Using the '=' operator
ref_surveys_df = surveys_df

In [9]:
# Assign the value `0` to the first three rows of data in the DataFrame
ref_surveys_df[0:3] = 0

In [10]:
# ref_surveys_df was created using the '=' operator
ref_surveys_df.head()

# surveys_df is the original dataframe
surveys_df.head()

Unnamed: 0,record_id,month,day,year,plot_id,species_id,sex,hindfoot_length,weight
0,0,0,0,0,0,0,0,0.0,0.0
1,0,0,0,0,0,0,0,0.0,0.0
2,0,0,0,0,0,0,0,0.0,0.0
3,4,7,16,1977,7,DM,M,36.0,
4,5,7,16,1977,3,DM,M,35.0,


In [11]:
surveys_df = pd.read_csv("data/surveys.csv")
# iloc[row slicing, column slicing]
surveys_df.iloc[0:3, 1:4]

Unnamed: 0,month,day,year
0,7,16,1977
1,7,16,1977
2,7,16,1977


In [12]:
# Select all columns for rows of index values 0 and 10
surveys_df.loc[[0, 10], :]

# What does this do?
surveys_df.loc[0, ['species_id', 'plot_id', 'weight']]

# What happens when you type the code below?
surveys_df.loc[[0, 10, 35549], :]

KeyError: '[35549] not in index'

In [14]:
surveys_df[surveys_df.year == 2002]
surveys_df[surveys_df.year != 2002]
surveys_df[(surveys_df.year >= 1980) & (surveys_df.year <= 1985)]

Unnamed: 0,record_id,month,day,year,plot_id,species_id,sex,hindfoot_length,weight
2270,2271,1,15,1980,8,DO,M,35.0,53.0
2271,2272,1,15,1980,11,PF,F,16.0,10.0
2272,2273,1,15,1980,18,DM,F,34.0,33.0
2273,2274,1,15,1980,11,DM,M,38.0,37.0
2274,2275,1,15,1980,8,DO,F,33.0,29.0
...,...,...,...,...,...,...,...,...,...
11222,11223,12,8,1985,4,DM,M,36.0,40.0
11223,11224,12,8,1985,11,DM,M,37.0,49.0
11224,11225,12,8,1985,7,PE,M,20.0,18.0
11225,11226,12,8,1985,1,DM,M,38.0,47.0


In [19]:
# What does this do?
empty_weights = surveys_df[pd.isnull(surveys_df['weight'])]['weight']
print(empty_weights)

0       NaN
1       NaN
2       NaN
3       NaN
4       NaN
         ..
35530   NaN
35543   NaN
35544   NaN
35545   NaN
35548   NaN
Name: weight, Length: 3266, dtype: float64


In [20]:
#==========================================================
# Make sure pandas is loaded
import pandas as pd

# Note that pd.read_csv is used because we imported pandas as pd
surveys_df = pd.read_csv("data/surveys.csv")
type(surveys_df)
surveys_df['sex'].dtype
surveys_df['record_id'].dtype

dtype('int64')

In [21]:
surveys_df.dtypes

record_id            int64
month                int64
day                  int64
year                 int64
plot_id              int64
species_id          object
sex                 object
hindfoot_length    float64
weight             float64
dtype: object

In [22]:
print(5+5)
print(24-4)

10
20


In [23]:
print(5/9)
print(10/3)
# Convert a to an integer
a = 7.83
int(a)
# Convert b to a float
b = 7
float(b)

0.5555555555555556
3.3333333333333335


7.0

In [24]:
surveys_df['record_id'] = surveys_df['record_id'].astype('float64')
surveys_df['record_id'].dtype

dtype('float64')

In [25]:
surveys_df['weight'].mean()

42.672428212991356

In [26]:
len(surveys_df[pd.isnull(surveys_df.weight)])
# How many rows have weight values?
len(surveys_df[surveys_df.weight > 0])

32283

In [27]:
df1 = surveys_df.copy()
# Fill all NaN values with 0
df1['weight'] = df1['weight'].fillna(0)

In [28]:
df1['weight'].mean()

38.751976145601844

In [29]:
df1['weight'] = surveys_df['weight'].fillna(surveys_df['weight'].mean())

In [30]:
surveys_df = pd.read_csv("data/surveys.csv")
df_na = surveys_df.dropna()
# Write DataFrame to CSV
df_na.to_csv('surveys_complete.csv', index=False)

In [31]:
#===================================================
import pandas as pd
surveys_df = pd.read_csv("data/surveys.csv",
                         keep_default_na=False, na_values=[""])
surveys_df

Unnamed: 0,record_id,month,day,year,plot_id,species_id,sex,hindfoot_length,weight
0,1,7,16,1977,2,NL,M,32.0,
1,2,7,16,1977,3,NL,M,33.0,
2,3,7,16,1977,2,DM,F,37.0,
3,4,7,16,1977,7,DM,M,36.0,
4,5,7,16,1977,3,DM,M,35.0,
...,...,...,...,...,...,...,...,...,...
35544,35545,12,31,2002,15,AH,,,
35545,35546,12,31,2002,15,AH,,,
35546,35547,12,31,2002,10,RM,F,15.0,14.0
35547,35548,12,31,2002,7,DO,M,36.0,51.0


In [32]:
# Read in first 10 lines of surveys table
survey_sub = surveys_df.head(10)
# Grab the last 10 rows
survey_sub_last10 = surveys_df.tail(10)
# Reset the index values to the second dataframe appends properly
survey_sub_last10 = survey_sub_last10.reset_index(drop=True)
# drop=True option avoids adding new index column with old index values

In [33]:
# Stack the DataFrames on top of each other
vertical_stack = pd.concat([survey_sub, survey_sub_last10], axis=0)

# Place the DataFrames side by side
horizontal_stack = pd.concat([survey_sub, survey_sub_last10], axis=1)

In [34]:
# Write DataFrame to CSV
vertical_stack.to_csv('data/out.csv', index=False)

In [35]:
# For kicks read our output back into Python and make sure all looks good
new_output = pd.read_csv('data/out.csv', keep_default_na=False, na_values=[""])

In [36]:
# Read in first 10 lines of surveys table
survey_sub = surveys_df.head(10)

# Import a small subset of the species data designed for this part of the lesson.
# It is stored in the data folder.
species_sub = pd.read_csv('data/speciesSubset.csv', keep_default_na=False, na_values=[""])

In [38]:
merged_inner = pd.merge(left=survey_sub, right=species_sub, left_on='species_id', right_on='species_id')
# In this case `species_id` is the only column name in  both dataframes, so if we skipped `left_on`
# And `right_on` arguments we would still get the same result

# What's the size of the output data?
merged_inner.shape
merged_inner

Unnamed: 0,record_id,month,day,year,plot_id,species_id,sex,hindfoot_length,weight,genus,species,taxa
0,1,7,16,1977,2,NL,M,32.0,,Neotoma,albigula,Rodent
1,2,7,16,1977,3,NL,M,33.0,,Neotoma,albigula,Rodent
2,3,7,16,1977,2,DM,F,37.0,,Dipodomys,merriami,Rodent
3,4,7,16,1977,7,DM,M,36.0,,Dipodomys,merriami,Rodent
4,5,7,16,1977,3,DM,M,35.0,,Dipodomys,merriami,Rodent
5,8,7,16,1977,1,DM,M,37.0,,Dipodomys,merriami,Rodent
6,9,7,16,1977,1,DM,F,34.0,,Dipodomys,merriami,Rodent
7,7,7,16,1977,2,PE,F,,,Peromyscus,eremicus,Rodent


In [39]:
merged_left = pd.merge(left=survey_sub, right=species_sub, how='left', left_on='species_id', right_on='species_id')
merged_left

Unnamed: 0,record_id,month,day,year,plot_id,species_id,sex,hindfoot_length,weight,genus,species,taxa
0,1,7,16,1977,2,NL,M,32.0,,Neotoma,albigula,Rodent
1,2,7,16,1977,3,NL,M,33.0,,Neotoma,albigula,Rodent
2,3,7,16,1977,2,DM,F,37.0,,Dipodomys,merriami,Rodent
3,4,7,16,1977,7,DM,M,36.0,,Dipodomys,merriami,Rodent
4,5,7,16,1977,3,DM,M,35.0,,Dipodomys,merriami,Rodent
5,6,7,16,1977,1,PF,M,14.0,,,,
6,7,7,16,1977,2,PE,F,,,Peromyscus,eremicus,Rodent
7,8,7,16,1977,1,DM,M,37.0,,Dipodomys,merriami,Rodent
8,9,7,16,1977,1,DM,F,34.0,,Dipodomys,merriami,Rodent
9,10,7,16,1977,6,PF,F,20.0,,,,


In [40]:
merged_left[ pd.isnull(merged_left.genus) ]

Unnamed: 0,record_id,month,day,year,plot_id,species_id,sex,hindfoot_length,weight,genus,species,taxa
5,6,7,16,1977,1,PF,M,14.0,,,,
9,10,7,16,1977,6,PF,F,20.0,,,,


In [41]:
animals = ['lion', 'tiger', 'crocodile', 'vulture', 'hippo']
print(animals)
for creature in animals:
    print(creature)
animals = ['lion', 'tiger', 'crocodile', 'vulture', 'hippo']
for creature in animals:
    pass
print('The loop variable is now: ' + creature)

['lion', 'tiger', 'crocodile', 'vulture', 'hippo']
lion
tiger
crocodile
vulture
hippo
The loop variable is now: hippo


In [42]:
import os

os.mkdir('data/yearly_files')
os.listdir('data')

['inflammation-01.csv',
 'inflammation-02.csv',
 'inflammation-03.csv',
 'inflammation-04.csv',
 'inflammation-05.csv',
 'inflammation-06.csv',
 'inflammation-07.csv',
 'inflammation-08.csv',
 'inflammation-09.csv',
 'inflammation-10.csv',
 'inflammation-11.csv',
 'inflammation-12.csv',
 'out.csv',
 'small-01.csv',
 'small-02.csv',
 'small-03.csv',
 'speciesSubset.csv',
 'surveys.csv',
 'yearly_files']

In [43]:
import pandas as pd

# Load the data into a DataFrame
surveys_df = pd.read_csv('data/surveys.csv')

# Select only data for the year 2002
surveys2002 = surveys_df[surveys_df.year == 2002]

# Write the new DataFrame to a CSV file
surveys2002.to_csv('data/yearly_files/surveys2002.csv')

In [44]:
surveys_df['year']

0        1977
1        1977
2        1977
3        1977
4        1977
         ... 
35544    2002
35545    2002
35546    2002
35547    2002
35548    2002
Name: year, Length: 35549, dtype: int64

In [45]:
surveys_df['year'].unique()

array([1977, 1978, 1979, 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987,
       1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
       1999, 2000, 2001, 2002], dtype=int64)

In [46]:
for year in surveys_df['year'].unique():
   filename='data/yearly_files/surveys' + str(year) + '.csv'
   print(filename)

data/yearly_files/surveys1977.csv
data/yearly_files/surveys1978.csv
data/yearly_files/surveys1979.csv
data/yearly_files/surveys1980.csv
data/yearly_files/surveys1981.csv
data/yearly_files/surveys1982.csv
data/yearly_files/surveys1983.csv
data/yearly_files/surveys1984.csv
data/yearly_files/surveys1985.csv
data/yearly_files/surveys1986.csv
data/yearly_files/surveys1987.csv
data/yearly_files/surveys1988.csv
data/yearly_files/surveys1989.csv
data/yearly_files/surveys1990.csv
data/yearly_files/surveys1991.csv
data/yearly_files/surveys1992.csv
data/yearly_files/surveys1993.csv
data/yearly_files/surveys1994.csv
data/yearly_files/surveys1995.csv
data/yearly_files/surveys1996.csv
data/yearly_files/surveys1997.csv
data/yearly_files/surveys1998.csv
data/yearly_files/surveys1999.csv
data/yearly_files/surveys2000.csv
data/yearly_files/surveys2001.csv
data/yearly_files/surveys2002.csv


In [48]:
# Load the data into a DataFrame
surveys_df = pd.read_csv('data/surveys.csv')

for year in surveys_df['year'].unique():

    # Select data for the year
    surveys_year = surveys_df[surveys_df.year == year]

    # Write the new DataFrame to a CSV file
    filename = 'data/yearly_files/surveys' + str(year) + '.csv'
    surveys_year.to_csv(filename)

In [49]:
filename = 'data/yearly_files/surveys' + str(year) + '.csv'
def this_is_the_function_name(input_argument1, input_argument2):

    # The body of the function is indented
    # This function prints the two arguments to screen
    print('The function arguments are:', input_argument1, input_argument2, '(this is done inside the function!)')

    # And returns their product
    return input_argument1 * input_argument2
product_of_inputs = this_is_the_function_name(2, 5)
print('Their product is:', product_of_inputs, '(this is done outside the function!)')


The function arguments are: 2 5 (this is done inside the function!)
Their product is: 10 (this is done outside the function!)


In [50]:
def one_year_csv_writer(this_year, all_data):
    """
    Writes a csv file for data from a given year.

    this_year -- year for which data is extracted
    all_data -- DataFrame with multi-year data
    """

    # Select data for the year
    surveys_year = all_data[all_data.year == this_year]

    # Write the new DataFrame to a csv file
    filename = 'data/yearly_files/function_surveys' + str(this_year) + '.csv'
    surveys_year.to_csv(filename)

In [51]:
one_year_csv_writer?
one_year_csv_writer(2002, surveys_df)

In [52]:
def yearly_data_csv_writer(start_year, end_year, all_data):
    """
    Writes separate CSV files for each year of data.

    start_year -- the first year of data we want
    end_year -- the last year of data we want
    all_data -- DataFrame with multi-year data
    """

    # "end_year" is the last year of data we want to pull, so we loop to end_year+1
    for year in range(start_year, end_year+1):
        one_year_csv_writer(year, all_data)

In [53]:
# Load the data into a DataFrame
surveys_df = pd.read_csv('data/surveys.csv')

# Create CSV files
yearly_data_csv_writer(1977, 2002, surveys_df)

In [54]:
def yearly_data_arg_test(all_data, start_year=1977, end_year=2002):
    """
    Modified from yearly_data_csv_writer to test default argument values!

    start_year -- the first year of data we want (default 1977)
    end_year -- the last year of data we want (default 2002)
    all_data -- DataFrame with multi-year data
    """

    return start_year, end_year


start, end = yearly_data_arg_test(surveys_df, 1988, 1993)
print('Both optional arguments:\t', start, end)

start, end = yearly_data_arg_test(surveys_df)
print('Default values:\t\t\t', start, end)

Both optional arguments:	 1988 1993
Default values:			 1977 2002


In [55]:
def yearly_data_arg_test(all_data, start_year=None, end_year=None):
    """
    Modified from yearly_data_csv_writer to test default argument values!

    all_data -- DataFrame with multi-year data
    start_year -- the first year of data we want, Check all_data! (default None)
    end_year -- the last year of data we want; Check all_data! (default None)
    """

    if start_year is None:
        start_year = min(all_data.year)
    if end_year is None:
        end_year = max(all_data.year)

    return start_year, end_year


start, end = yearly_data_arg_test(surveys_df, 1988, 1993)
print('Both optional arguments:\t', start, end)

start, end = yearly_data_arg_test(surveys_df)
print('Default values:\t\t\t', start, end)

Both optional arguments:	 1988 1993
Default values:			 1977 2002


In [56]:
a = 5

if a<0:  # Meets first condition?

    # if a IS less than zero
    print('a is a negative number')

elif a>0:  # Did not meet first condition. meets second condition?

    # if a ISN'T less than zero and IS more than zero
    print('a is a positive number')

else:  # Met neither condition

    # if a ISN'T less than zero and ISN'T more than zero
    print('a must be zero!')

a is a positive number


In [57]:
start, end = yearly_data_arg_test(surveys_df)
print('Default values:\t\t\t', start, end)

start, end = yearly_data_arg_test(surveys_df, 1988, 1993)
print('No keywords:\t\t\t', start, end)

start, end = yearly_data_arg_test(surveys_df, start_year=1988, end_year=1993)
print('Both keywords, in order:\t', start, end)

start, end = yearly_data_arg_test(surveys_df, end_year=1993, start_year=1988)
print('Both keywords, flipped:\t\t', start, end)

start, end = yearly_data_arg_test(surveys_df, start_year=1988)
print('One keyword, default end:\t', start, end)

start, end = yearly_data_arg_test(surveys_df, end_year=1993)
print('One keyword, default start:\t', start, end)

Default values:			 1977 2002
No keywords:			 1988 1993
Both keywords, in order:	 1988 1993
Both keywords, flipped:		 1988 1993
One keyword, default end:	 1988 2002
One keyword, default start:	 1977 1993


In [59]:
#================================================================
%matplotlib inline
import plotnine as p9

In [60]:
import pandas as pd

surveys_complete = pd.read_csv('data/surveys.csv')
surveys_complete = surveys_complete.dropna()

In [61]:
(p9.ggplot(data=surveys_complete))
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length')))
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length'))
    + p9.geom_point()
)
# Create
surveys_plot = p9.ggplot(data=surveys_complete,
                         mapping=p9.aes(x='weight', y='hindfoot_length'))

# Draw the plot
surveys_plot + p9.geom_point()
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length'))
    + p9.geom_point()
)
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length'))
    + p9.geom_point(alpha=0.1)
)
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length'))
    + p9.geom_point(alpha=0.1, color='blue')
)
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length',
                          color='species_id'))
    + p9.geom_point(alpha=0.1)
)

TypeError: set_ticks() takes 2 positional arguments but 3 were given

In [62]:
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length', color='species_id'))
    + p9.geom_point(alpha=0.1)
    + p9.xlab("Weight (g)")
)

TypeError: set_ticks() takes 2 positional arguments but 3 were given

In [63]:
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length', color='species_id'))
    + p9.geom_point(alpha=0.1)
    + p9.xlab("Weight (g)")
    + p9.scale_x_log10()
)

TypeError: set_ticks() takes 2 positional arguments but 3 were given

In [64]:
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length', color='species_id'))
    + p9.geom_point(alpha=0.1)
    + p9.xlab("Weight (g)")
    + p9.scale_x_log10()
    + p9.theme_bw()
    + p9.theme(text=p9.element_text(size=16))
)

TypeError: set_ticks() takes 2 positional arguments but 3 were given

In [65]:
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='species_id',
                          y='weight'))
    + p9.geom_boxplot()
)

TypeError: set_ticks() takes 2 positional arguments but 3 were given

In [66]:
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='species_id',
                          y='weight'))
    + p9.geom_jitter(alpha=0.2)
    + p9.geom_boxplot(alpha=0.)
)

TypeError: set_ticks() takes 2 positional arguments but 3 were given

In [67]:
yearly_counts = surveys_complete.groupby(['year', 'species_id'])['species_id'].count()
yearly_counts

year  species_id
1977  DM            181
      DO             12
      DS             29
      OL              1
      OX              2
                   ... 
2002  PP            375
      RM             20
      RO              7
      SF              5
      SH              9
Name: species_id, Length: 332, dtype: int64

In [68]:
yearly_counts = yearly_counts.reset_index(name='counts')
yearly_counts

Unnamed: 0,year,species_id,counts
0,1977,DM,181
1,1977,DO,12
2,1977,DS,29
3,1977,OL,1
4,1977,OX,2
...,...,...,...
327,2002,PP,375
328,2002,RM,20
329,2002,RO,7
330,2002,SF,5


In [69]:
(p9.ggplot(data=yearly_counts,
           mapping=p9.aes(x='year',
                          y='counts'))
    + p9.geom_line()
)

TypeError: set_ticks() takes 2 positional arguments but 3 were given

In [70]:
(p9.ggplot(data=yearly_counts,
           mapping=p9.aes(x='year',
                          y='counts',
                          color='species_id'))
    + p9.geom_line()
)

TypeError: set_ticks() takes 2 positional arguments but 3 were given

In [71]:
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length',
                          color='species_id'))
    + p9.geom_point(alpha=0.1)
)

TypeError: set_ticks() takes 2 positional arguments but 3 were given

In [72]:
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length',
                          color='species_id'))
    + p9.geom_point(alpha=0.1)
    + p9.facet_wrap("sex")
)

TypeError: set_ticks() takes 2 positional arguments but 3 were given

In [73]:
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length',
                          color='species_id'))
    + p9.geom_point(alpha=0.1)
    + p9.facet_wrap("plot_id")
)

TypeError: set_ticks() takes 2 positional arguments but 3 were given

In [74]:
# only select the years of interest
survey_2000 = surveys_complete[surveys_complete["year"].isin([2000, 2001])]

(p9.ggplot(data=survey_2000,
           mapping=p9.aes(x='weight',
                          y='hindfoot_length',
                          color='species_id'))
    + p9.geom_point(alpha=0.1)
    + p9.facet_grid("year ~ sex")
)

TypeError: set_ticks() takes 2 positional arguments but 3 were given

In [75]:
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='factor(year)'))
    + p9.geom_bar()
)

TypeError: set_ticks() takes 2 positional arguments but 3 were given

In [76]:
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='factor(year)'))
    + p9.geom_bar()
    + p9.theme_bw()
    + p9.theme(axis_text_x = p9.element_text(angle=90))
)

TypeError: set_ticks() takes 2 positional arguments but 3 were given

In [77]:
my_custom_theme = p9.theme(axis_text_x = p9.element_text(color="grey", size=10,
                                                         angle=90, hjust=.5),
                           axis_text_y = p9.element_text(color="grey", size=10))
(p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='factor(year)'))
    + p9.geom_bar()
    + my_custom_theme
)

TypeError: set_ticks() takes 2 positional arguments but 3 were given

In [78]:
my_plot = (p9.ggplot(data=surveys_complete,
           mapping=p9.aes(x='weight', y='hindfoot_length'))
    + p9.geom_point()
)
my_plot.save("scatterplot.png", width=10, height=10, dpi=300)



TypeError: set_ticks() takes 2 positional arguments but 3 were given