# 1 Creating, reading & writing

In [1]:
import pandas as pd

## DataFrame

In [2]:
pd.DataFrame({'Yes': [50, 21], 'No': [131, 2]})

Unnamed: 0,Yes,No
0,50,131
1,21,2


In [3]:
pd.DataFrame({'Bob': ['I liked it.', 'It was awful.'], 
              'Sue': ['Pretty good.', 'Bland.']})

Unnamed: 0,Bob,Sue
0,I liked it.,Pretty good.
1,It was awful.,Bland.


In [4]:
pd.DataFrame({'Bob': ['I liked it.', 'It was awful.'], 
              'Sue': ['Pretty good.', 'Bland.']},
             index=['Product A', 'Product B'])

Unnamed: 0,Bob,Sue
Product A,I liked it.,Pretty good.
Product B,It was awful.,Bland.


## Series

In [5]:
pd.Series([1, 2, 3, 4, 5])

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [6]:
pd.Series([30, 35, 40], 
           index=['2015 Sales', '2016 Sales', '2017 Sales'],
           name='Product A')

2015 Sales    30
2016 Sales    35
2017 Sales    40
Name: Product A, dtype: int64

In [7]:
# open and indexing df
reviews = pd.read_csv("/home/gluecksman/src/Data_and_ML/pandas/winemag-data-130k-v2.csv", index_col=0)

# saving df to something
reviews.to_csv("just_a_fancy_test_name.csv")

# 2 Indexing, selecting & assigning

In [8]:
reviews.country
# same as 
reviews['country']


# further, we can do this:
reviews['country'][0] # selects 1st
reviews['country'][0:10] # selects from 1st - 10th

0       Italy
1    Portugal
2          US
3          US
4          US
5       Spain
6       Italy
7      France
8     Germany
9      France
Name: country, dtype: object

### index-based selection
first element of the range is included and the last one excluded

In [9]:
# index-based selection
reviews.iloc[0] # selects all row (and thus, the different columns' values)

# to get a column with iloc
reviews.iloc[:, 0] # selects all the column i= 0

# both below are =
reviews.iloc[:3, 0] # selects from the column i= 0, the 1st, 2nd and 3rd row
reviews.iloc[[0, 1, 2], 0] # the same

reviews.iloc[1:3, 0] # selects, from the column i= 0, JUST 2nd & 3rd entries

reviews.iloc[-5:] # selects last five rows (with all the column's content)

sample_reviews = reviews.iloc[[1, 2, 3, 5, 8], :] # selects the rows 1, 2, 3, 5, 8 entirely (with all the columns)

### Label-based selection
indexes inclusively

In [10]:
# label-based selection
reviews.loc[:, ['taster_name', 'taster_twitter_handle', 'points']] # selects rows from the desired columns 

Unnamed: 0,taster_name,taster_twitter_handle,points
0,Kerin O’Keefe,@kerinokeefe,87
1,Roger Voss,@vossroger,87
2,Paul Gregutt,@paulgwine,87
3,Alexander Peartree,,87
4,Paul Gregutt,@paulgwine,87
...,...,...,...
129966,Anna Lee C. Iijima,,90
129967,Paul Gregutt,@paulgwine,90
129968,Roger Voss,@vossroger,90
129969,Roger Voss,@vossroger,90


### More things below

In [11]:
# Manipulating the index
reviews.set_index("title")

# Conditional selection
reviews.country == 'Italy' #  produced a Series of True/False booleans based on the country of each record

reviews.loc[reviews.country == 'Italy'] # selects rows where column 'country' == 'italy'
reviews.loc[(reviews.country == 'Italy') & (reviews.points >= 90)] # and where the column 'points' ==result > 90
reviews.loc[(reviews.country == 'Italy') | (reviews.points >= 90)] # or (same thing otherwise)

# isin lets you select the data whose value "is in" a list of values
reviews.loc[reviews.country.isin(['Italy', 'France'])]

# isnull/notnull
reviews.loc[reviews.price.notnull()] # selects rows that are non-null/non-NaN in the 'price' column
reviews.loc[reviews.price.isnull()] # selects rows that are null/NaN in the 'price' column

# assigning data to a df
reviews['critic'] = 'everyone'
reviews['index_backwards'] = range(len(reviews), 0, -1) # -1 means backwards, otherwise the range is not working 


# 3 Summary functions & Maps