# Pandas Columns & Series

In [1]:
import pandas as pd

In [2]:
houses = pd.read_csv("data/kc_house_data.csv")
titanic = pd.read_csv("data/titanic.csv")
netflix = pd.read_csv("data/netflix_titles.csv", sep="|", index_col=0)
titanic.head()

Unnamed: 0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
0,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,2,?,"St Louis, MO"
1,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,11,?,"Montreal, PQ / Chesterville, ON"
2,1,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,?,?,"Montreal, PQ / Chesterville, ON"
3,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,?,135,"Montreal, PQ / Chesterville, ON"
4,1,0,"Allison, Mrs. Hudson J C (Bessie Waldo Daniels)",female,25.0,1,2,113781,151.55,C22 C26,S,?,?,"Montreal, PQ / Chesterville, ON"


### selecting a signle column

In [3]:
titanic['sex']

0       female
1         male
2       female
3         male
4       female
         ...  
1304    female
1305    female
1306      male
1307      male
1308      male
Name: sex, Length: 1309, dtype: object

In [4]:
titanic["home.dest"]

0                          St Louis, MO
1       Montreal, PQ / Chesterville, ON
2       Montreal, PQ / Chesterville, ON
3       Montreal, PQ / Chesterville, ON
4       Montreal, PQ / Chesterville, ON
                     ...               
1304                                  ?
1305                                  ?
1306                                  ?
1307                                  ?
1308                                  ?
Name: home.dest, Length: 1309, dtype: object

In [5]:
type(houses.mean()) 
# one dimensional array

  type(houses.mean())


pandas.core.series.Series

In [6]:
houses.price.sum() # give single value

11672925008.0

In [7]:
houses.price.max()

7700000.0

## Series properties

In [8]:
titanic.shape

(1309, 14)

In [9]:
price=houses.price

In [10]:
price.shape

(21613,)

In [11]:
price.index

RangeIndex(start=0, stop=21613, step=1)

In [12]:
min_values=houses.min(numeric_only=True)
min_values

id               1.000102e+06
price            7.500000e+04
bedrooms         0.000000e+00
bathrooms        0.000000e+00
sqft_living      2.900000e+02
sqft_lot         5.200000e+02
floors           1.000000e+00
waterfront       0.000000e+00
view             0.000000e+00
condition        1.000000e+00
grade            1.000000e+00
sqft_above       2.900000e+02
sqft_basement    0.000000e+00
yr_built         1.900000e+03
yr_renovated     0.000000e+00
zipcode          9.800100e+04
lat              4.715590e+01
long            -1.225190e+02
sqft_living15    3.990000e+02
sqft_lot15       6.510000e+02
dtype: float64

In [13]:
min_values.index

Index(['id', 'price', 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot',
       'floors', 'waterfront', 'view', 'condition', 'grade', 'sqft_above',
       'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode', 'lat', 'long',
       'sqft_living15', 'sqft_lot15'],
      dtype='object')

## Important Series Methods

* `head()`
* `tail()`
* `describe()`
* `unique()`
* `nunique()`
* `nlargest()`
* `nsmallest()`
* `value_counts()`
* `plot()` YAY!

In [14]:
titanic.age.head()

0        29
1    0.9167
2         2
3        30
4        25
Name: age, dtype: object

In [15]:
netflix.director.tail()

8802      David Fincher
8803                NaN
8804    Ruben Fleischer
8805       Peter Hewitt
8806        Mozez Singh
Name: director, dtype: object

In [16]:
# describe about the last 10 directors
netflix.director.tail(10).describe()

count                           7
unique                          7
top       Chandra Prakash Dwivedi
freq                            1
Name: director, dtype: object

In [17]:
netflix.release_year.describe()

count    8807.000000
mean     2014.180198
std         8.819312
min      1925.000000
25%      2013.000000
50%      2017.000000
75%      2019.000000
max      2021.000000
Name: release_year, dtype: float64

In [18]:
# Unique directors

netflix.director.unique()

array(['Kirsten Johnson', nan, 'Julien Leclercq', ..., 'Majid Al Ansari',
       'Peter Hewitt', 'Mozez Singh'], dtype=object)

In [19]:
houses.zipcode.unique()

array([98178, 98125, 98028, 98136, 98074, 98053, 98003, 98198, 98146,
       98038, 98007, 98115, 98107, 98126, 98019, 98103, 98002, 98133,
       98040, 98092, 98030, 98119, 98112, 98052, 98027, 98117, 98058,
       98001, 98056, 98166, 98023, 98070, 98148, 98105, 98042, 98008,
       98059, 98122, 98144, 98004, 98005, 98034, 98075, 98116, 98010,
       98118, 98199, 98032, 98045, 98102, 98077, 98108, 98168, 98177,
       98065, 98029, 98006, 98109, 98022, 98033, 98155, 98024, 98011,
       98031, 98106, 98072, 98188, 98014, 98055, 98039])

In [24]:
# how much unique rating are there
netflix.rating.nunique(dropna=True)

17

### nlargest & nsmallest

In [25]:
houses.price.nsmallest(10)

1149     75000.0
15293    78000.0
465      80000.0
16198    81000.0
8274     82000.0
2141     82500.0
18468    83000.0
3767     84000.0
10253    85000.0
16714    85000.0
Name: price, dtype: float64