This notebook illustrates some examples observed (not exhaustive) from the DataCamp practices, and some other examples.

- https://practice.datacamp.com/p/5
- https://practice.datacamp.com/p/16

### Some Pandas Methods/Attributes

- df.head()
- df.tail()
- df.info()
- df.shape

In [None]:
import pandas as pd
import numpy as np

#### Numpy Logical Functions

In [None]:
x = np.array([9, 5])
y = np.array([16, 12])

np.logical_or(x < 5, y > 15)

In [None]:
x < 5

In [None]:
y > 15

In [None]:
# Python's bitwise logic operators
(x < 5)|(y > 15) 

#### pandas.DataFrame.to_numpy

In [None]:
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_numpy.html
# DataFrame.to_numpy(dtype=None, copy=False, na_value=<no_default>)[source]

df = pd.DataFrame({"A": [1, 2], "B": [3.0, 4.5]})
arr = df.to_numpy()
arr[0,1] = 100
print (arr)
display (df)

In [None]:
df = pd.DataFrame({"A": [1, 2], "B": [3.0, 4.5]})
arr = df.to_numpy(copy=True)
arr[0,1] = 100
print (arr)
display (df)

In [None]:
df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
arr = df.to_numpy()
arr[0,1] = 100
print (arr)
display (df)

In [None]:
df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
arr = df.to_numpy(copy=True) # Using this would be more explicitly declared.
arr[0,1] = 100
print (arr)
display (df) 

In [None]:
df = pd.DataFrame({"A": [1, 2], "B": [3.0, 4.5]})
arr = df.to_numpy()
np.shares_memory(arr, df.values)

In [None]:
df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
arr = df.to_numpy()
np.shares_memory(arr, df.values)

#### .copy()

In [None]:
arr1=arr.copy()
arr1

In [None]:
arr[0,1] = 33
print (arr)
print (arr1)

In [None]:
df = pd.DataFrame({"A": [1, 2], "B": [3, 4]})
df1=df.copy()
df1

In [None]:
df.iloc[0,1] = 33
display (df)
display (df1)

#### Pandas filtering and Dataframe creation

In [None]:
#example dictionary
my_dict = { 'course':['dv','ds','de','de'], 'level':['beginner','intermediate','intermediate', 'advanced'], \
           'lesson': [5,10,15,20] }

In [None]:
datacamp = pd.DataFrame(my_dict) # question is asking which Pandas function to use - use pd.DataFrame function
datacamp

In [None]:
datacamp

In [None]:
datacamp[(datacamp['course']=='ds')]

In [None]:
datacamp[(datacamp['course']=='ds')|(datacamp['course']=='de')]

In [None]:
# & is evaluated first before |
datacamp[(datacamp['course']=='ds')|(datacamp['course']=='de')&(datacamp['lesson']>=15)]

In [None]:
datacamp[((datacamp['course']=='ds')|(datacamp['course']=='de'))&(datacamp['lesson']>=15)]

In [None]:
keys = ['a', 'b', 'c']
values = [[1, 2, 3], [11, 22, 33],[10, 20, 30]]

# zip to combine keys and values into a tuple
# Constructing a dictionary from the zipped data
zipped = zip(keys, values)
data = dict(zipped)

data

In [None]:
pd.DataFrame(data) 

In [None]:
keys = ['a', 'b', 'c']
values = [[1, 2, 3], [11, 22, 33],[10, 20, 30]]

# zip to combine keys and values into a tuple
# Constructing a list from the zipped data
list(zip(keys, values))

In [None]:
pd.DataFrame(list(zip(keys, values)))

#### Pandas Dataframe Iterrows

- The iterrows() method generates an iterator object of the DataFrame, allowing us to iterate each row in the DataFrame.
- Each iteration produces an index object and a row object (a Pandas Series object).

In [None]:
import pandas as pd

data = {
  "firstname": ["Sally", "Mary", "John"],
  "age": [50, 40, 30]
}

df = pd.DataFrame(data)
df

In [None]:
for index, row in df.iterrows():
    print(f'This is the index: {index}')
    print(f'This is the row: {row}')

In [None]:
for index, row in df.iterrows():
    print(row['firstname'])

In [None]:
# There will be a TypeError if we do this.
for row in df.iterrows():
    print(row['firstname'])

#### Use of loc and iloc

In [None]:
dict = {
"country":["Brazil", "Russia", "India", "China", "South Africa"],
"capital":["Brasilia", "Moscow", "New Delhi", "Beijing", "Pretoria"],
"area":[8.516, 17.10, 3.286, 9.597, 1.221],
"population":[200.4, 143.5, 1252, 1357, 52.98] }

brics = pd.DataFrame(dict)
brics

In [None]:
brics.values # return array representation 

In [None]:
brics.shape 

In [None]:
brics['country']

In [None]:
brics[['country']]

In [None]:
brics[['country', 'area']]

In [None]:
brics.loc[:, 'country']

In [None]:
brics.loc[:, ['country']]

In [None]:
brics.loc[[0,2], ['country', 'area']]

In [None]:
# SyntaxError: invalid syntax
brics.loc[[0:2], ['country', 'area']]

In [None]:
brics.loc[0:2, ['country', 'area']]

In [None]:
brics.iloc[:, 0]

In [None]:
brics.iloc[:, [0]]

In [None]:
brics.iloc[1:3, 0:2]

#### Quantiles

https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.quantile.html

In [None]:
brics['area'].quantile([0.1,0.5,0.9]) #10%, 50%, 90%

#### Reindex

In [None]:
data = {
  "age": [50, 40, 30, 40],
  "qualified": [True, False, False, False]
}

df = pd.DataFrame(data)
df

- https://www.w3schools.com/python/pandas/ref_df_reindex.asp
- https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.reindex.html

In [None]:
import pandas as pd

data = {
  "age": [50, 40, 30, 40],
  "qualified": [True, False, False, False]
}

idx = ["Sally", "Mary", "John", "Monica"]

df = pd.DataFrame(data, index=idx)
df

In [None]:
# Note: The reindex method does NOT make changes to the original DataFrame object.
newidx = ["Robert", "Cindy", "John", "Monica"]
df.reindex(newidx)

In [None]:
# Note: The reindex method does NOT make changes to the original DataFrame object.
df

In [None]:
# Note:The values are set to NaN if the new index is not the same as the old index.

newidx = ["Robert", "Cindy", "John", "Monica"]

newdf = df.reindex(newidx)
newdf

In [None]:
newidx = ["Robert", "Cindy", "John", "Monica"]

newdf = df.reindex(newidx, fill_value='missing')
newdf