## Exploring `Series` and `DataFrame` Objects
### Working with pandas
*Curtis Miller*

Let's create some `Series`.

In [1]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

In [2]:
ser1 = Series([1, 2, 3, 4])
ser2 = Series(['a', 'b', 'c'])
print(ser1)

0    1
1    2
2    3
3    4
dtype: int64


In [3]:
print(ser2)

0    a
1    b
2    c
dtype: object


In [4]:
# Create a pandas Index
idx = pd.Index(["New York", "Los Angeles", "Chicago",
                "Houston", "Philadelphia", "Phoenix", "San Antonio",
                "San Diego", "Dallas"])
print(idx)

Index(['New York', 'Los Angeles', 'Chicago', 'Houston', 'Philadelphia',
       'Phoenix', 'San Antonio', 'San Diego', 'Dallas'],
      dtype='object')


In [None]:
pops = Series([8550, 3972, 2721, 2296, 1567, np.nan, 1470, 1395, 1300],
              index=idx, name="Population")
print(pops)

In [None]:
state = Series({"New York": "New York", "Los Angeles": "California", "Phoenix": "Arizona", "San Antonio": "Texas",
                "San Diego": "California", "Dallas": "Texas"}, name = "State")
print(state)

In [None]:
area = Series({"New York": 302.6, "Los Angeles": 468.7, "Philadelphia": 134.1, "Phoenix": 516.7, "Austin": 322.48},
              name = "Area")
print(area)

Let's see some of the ways we can create `DataFrame`s, first without indices.

In [None]:
# From a NumPy array
mat = np.arange(0,9).reshape(3, 3)
print(mat)

In [None]:
print(DataFrame(mat))

In [None]:
# Adding labels
print(DataFrame(mat, index=['a', 'b', 'c'], columns = ['alpha', 'beta', 'gamma']))

In [None]:
# What amounts to a 2D array (each tuple a row)
arr = [(1, 'a'), (2, 'b'), (3, 'c')]
print(arr)

In [None]:
print(DataFrame(arr, columns = ["Numbers", "Letters"]))

In [None]:
# Creating from a dict
print(DataFrame({"Numbers": [1, 2, 3], "Letters": ['a', 'b', 'c']}))

In [None]:
# What if not all lists are the same length?
# We get an error
print(DataFrame({"Numbers": [1, 2, 3, 4], "Letters": ['a', 'b', 'c']}))

In [None]:
# Do we get an error?
DataFrame({"Numbers": ser1, "Letters": ser2})    # nan fills in "missing" information (Series not of same length)

Let's now create a DataFrame containing information about cities.

In [None]:
# When passed as a list, series are treated as rows
# Notice that these Series are not the same length nor all have the same entries; nan will be generated
print(DataFrame([pops, state, area]))

In [None]:
print(DataFrame({"Population": pops, "State": state, "Area": area}))

In [None]:
# Or, we could use DataFrame's T (transpose) method
print(DataFrame([pops, state, area]).T)

How can we add new data to `Series` or `DataFrame`s?

In [None]:
# Let's append new data to each Series
pops.append(Series({"Seattle": 684, "Denver": 683}))     # Not done in place

In [None]:
df = DataFrame([pops, state, area]).T
df.append(DataFrame({"Population": Series({"Seattle": 684, "Denver": 683}),
                     "State": Series({"Seattle": "Washington", "Denver": "Colorado"}),
                     "Area": Series({"Seattle": np.nan, "Denver": np.nan})}))

In [None]:
pd.concat([df, DataFrame({"Numbers": Series(np.arange(9), index=pops.index),
                         "Letters": Series(['a', 'c', 'd', 'h', 'l', 'n', 'p', 'p', 's'], index=pops.index)})],
         axis=1)

Finally we save the data to a CSV file for later use.

In [None]:
df = DataFrame([pops, state, area]).T
# Saving data to csv file
df.to_csv("cities.csv")