# Pandas Tutorial

### 1. how to import pandas library?

In [None]:
import pandas

### 2. how to check pandas version?

In [None]:
pandas.__version__

### 3. how to use pandas?

In [None]:
import pandas as pd

### 4. how to import data from a csv file?

#### *read_csv()*

In [None]:
df = pd.read_csv('olympics.csv')

### 5. how to access the data frame?

In [None]:
df

### 6. how to get the info about data?

#### *.info()*; *.shape()*

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.shape[0]

In [None]:
df.shape[1]

### 7. how to list few data instances?

#### *.head()*; *tail()*

In [None]:
df.head()

In [None]:
df.tail(3)

### 8. how to check the data type?

#### *type()*

In [None]:
type(df)

In [None]:
type(df.city)

### 9. how to access columns/series using [...]?

In [None]:
df['athlete']

### 10. how to access columns/series using dot notation?

In [None]:
df.athlete

### 11. how to access multiple columns?

In [None]:
df[['city','NOC','athlete']]

In [None]:
type(df[['city','sport','athlete']])

### 12. how to obtain the frequency of unique values in a series?

### *.value_counts()*

In [None]:
df.athlete.value_counts()

In [None]:
df.gender.value_counts(ascending = True, dropna = False)

### 13. how to sort a data frame or a series along either axis?

### *.sort_values()*; *axis=0 (rows):*; *axis=1 (columns):*

In [None]:
athlete = df.athlete.sort_values()
athlete

In [None]:
df.sort_values(by = ['NOC','athlete'])

### 14. how to use indexing?

In [None]:
# In pandas, the index attribute represents the index (row labels) of a DataFrame 
# or Series. You can access it directly using the index attribute.

### *set_index() to set a DataFrame's index with a specific column.*

In [None]:
df.set_index('athlete')

In [None]:
df.head()

In [None]:
# The inplace=True parameter is used to modify the DataFrame in place, without the 
# need to create a new DataFrame. If inplace=False or not specified (the default), a 
# new DataFrame with the reset index is returned.

In [None]:
df.set_index('athlete', inplace = True)

In [None]:
df.head()

### *reset_index()*

In [None]:
df.reset_index(inplace = True)

In [None]:
df.head()

### *sort_index()*

In [None]:
# The sort_index() method in pandas is used to sort a DataFrame or a Series by its 
# index. This method returns a new object with the same data but with the index sorted.

In [None]:
df.set_index('athlete', inplace = True)

In [None]:
df.head()

In [None]:
df.sort_index(inplace = True, ascending = False)

In [None]:
df.head()

In [None]:
df.reset_index(inplace = True)

In [None]:
df.head()

### 14. how to use loc accessor?

### *.loc[...]*

In [None]:
# .loc is used for label-based indexing, allowing you to access a group of rows and 
# columns by specifying labels or boolean arrays. It is primarily used to select data 
# based on the labels of rows or columns.

In [None]:
# .loc is inclusive on both sides, meaning that the start and stop labels specified 
# are included in the selection.

In [None]:
df.loc[df.athlete == 'MASSON, Paul']

In [None]:
df.head()

### 15. how to use iloc accessor?

###  *iloc[...]*

In [None]:
# The iloc accessor in pandas is used for integer-location based indexing. 
# It allows you to select data from a DataFrame based on the integer indices 
# of rows and columns. 

In [None]:
df.iloc[100]

In [None]:
df.iloc[[1245, 2201, 3430, 12000]]

In [None]:
df.iloc[1:7]

### 16. how to use groupby() function?

###  *groupby()*

In [None]:
# The groupby() function in pandas is used for grouping data based on some criteria, 
# and it is followed by an aggregation or transformation operation.  

In [None]:
# The basic idea is to split the data into groups based on a specified key and then 
# apply a function to each group independently.

In [None]:
list(df.groupby('edition'))

In [None]:
# Aggregation with groupby()

In [None]:
df.groupby('edition').size()

In [None]:
# min, max, and count

In [None]:
df.groupby(['edition','NOC','medal']).agg(['min','max','count'])

In [None]:
df.groupby(['edition','NOC','medal']).size()

In [None]:
df.groupby(['edition','NOC','medal']).agg({'edition' :['min','max','count']})

In [None]:
df.loc[df.athlete == 'LEWIS, Carl'].groupby('athlete').agg({'edition' : ['min','max','count']})

### *THE END*

In [None]:
%%HTML
<style type="text/css">
.CodeMirror{
	font-family: Ubuntu Mono;
	font-size: 16pt;
}

----------------------------------------------------