In [66]:
import pandas as pd
import numpy as np

In [None]:
example_series = pd.Series([1,[2,3],4,8,16])

example_series.explode()

0     1
1     2
1     3
2     4
3     8
4    16
dtype: object

In [93]:
data = {"state": ["Ohio", "Ohio", "Ohio", "Nevada", "Nevada", "Nevada"],
        "year": [2000, 2001, 2002, 2001, 2002, 2003],
        "pop": [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}


nu_index = pd.Index(["a", "b", "c", "d", "e", "f"])
frame = pd.DataFrame(data, index=nu_index)

frame.index = np.arange(6)

frame

test1 = np.arange(5)
test2 = np.arange(3)
np.concatenate([test1, test2])

array([0, 1, 2, 3, 4, 0, 1, 2])

In [94]:
labels = pd.Index(np.arange(3))

obj2 = pd.Series([1.5, -2.5, 0], index=labels)
obj3 = pd.Series([1.5, -2.5, 0], np.arange(3))

frame.index


Index([0, 1, 2, 3, 4, 5], dtype='int64')

In [499]:
# construction of a basic dataframe from list
 
rnd_gen = np.random.default_rng(1)

toy_dict = {
    "name": ["paul", "pablo", "paulo", "paulie"],
    "age": [36, 21, 60, 25],
    "random": rnd_gen.integers(0, 10, 4)
}

df = pd.DataFrame(toy_dict, index=["now", "young", "old", "youngish"])

df["super"] = 1

df




Unnamed: 0,name,age,random,super
now,paul,36,4,1
young,pablo,21,5,1
old,paulo,60,7,1
youngish,paulie,25,9,1


In [177]:
# construction of dataframe from nested list
rnd_gen = np.random.default_rng(3)

nested_dict = {
    "paul": ["beard", 36, rnd_gen.integers(0, 10, 1)[0]],
    "pablo": ["barba", 21, rnd_gen.integers(0, 10, 1)[0]]
}

nested_df = pd.DataFrame(nested_dict, index=["surname", "age", "random"])

nested_df.reindex(
    columns = ["pablo", "paul"], 
    index = ["age", "surname", "random", "secret"], 
    fill_value= "D'oh"
    )

nested_df.reindex(
    columns = ["pablo", "paul"], 
    index = np.setdiff1d(nested_df.index, "age"), 
    fill_value= "D'oh"
    )

nested_df.drop(
    labels = ["pablo"], 
    axis = 1
    )

Unnamed: 0,paul
surname,beard
age,36
random,8


In [194]:
obj = pd.Series(np.arange(4.), index=["a", "b", "c", "d"])

obj[1:3] = 888
obj[["a", "b"]] = 999
obj.loc[["a", "b"]] = -1

obj

a     -1.0
b     -1.0
c    888.0
d      3.0
dtype: float64

In [219]:
obj1 = pd.Series([1, 2, 3], index=[2, 0, 1])
obj2 = pd.Series([1, 2, 3], index=["a", "b", "c"])

obj1.iloc[[1, 2]] # location based on position
obj1.loc[[1, 2]] # location based on index label matchign

t123 = obj2.loc["a":"c"] # location based on label (using slice feature instead of fancy indexing)
t123["b"] = 9
obj2

a    1
b    9
c    3
dtype: int64

## Selecting and filtering dataframes...

In [350]:
# 1. just using df[]

# boolean lists act as row filters
df[df["age"] > 30]
df[[True, True, True, False]]

# slice acts as row filters
df[:2]

# character lists act as column filters! 
df[["name", "age", "random"]]

# .loc is the main way to use filter() and select() in Python, with filter() being the first arg and select() being the second
df.iloc[1:, :][df["age"] < 35]
young_men = df.loc[df["age"] < 35, ["age", "name"]]

# 2. using df.loc[] or df.iloc[]

df.loc["now", "name"] # df.loc[rows, cols]

df.iloc[[0, -1], [0, 1] ] # df.iloc[rows, cols]


# 3. using df.at[] or df.iat[] (like loc/iloc but works for single values only )

df.at["now", "age"]

df.iat[0, 1]


# chaining can lead to trouble, the best practice is to use loc[] and iloc[] in a single go...

df.loc[
    (df["name"].str.endswith("o"))  & (df["age"] > 0) , #  note the use of explicit tupples... this is because & is evaluated BEFORE >
    ["age", "name"]
    ]

  df.iloc[1:, :][df["age"] < 35]


Unnamed: 0,age,name
young,21,pablo
old,60,paulo


In [384]:
# arithmetic operations will automatically align by their indices labels. the empty slots can be filled various ways

rnd_gen2 = np.random.default_rng(2)

no_set_1 = pd.DataFrame(np.arange(12).reshape(3, 4), index=["a", "b", "c"], columns=["a", "b", "c", "d"])
no_set_2 = pd.DataFrame(rnd_gen2.poisson(3, 12).reshape(3, 4), index=["b", "c", "d"], columns=["a", "b", "c", "e"])

no_set_1.pow(no_set_2, fill_value=-1)
no_set_1.rpow(no_set_2, fill_value=-1)
no_set_2.pow(no_set_1, fill_value=-1)

Unnamed: 0,a,b,c,d,e
a,1.0,-1.0,1.0,-1.0,
b,81.0,243.0,729.0,-1.0,0.166667
c,256.0,10077696.0,1.0,-1.0,0.166667
d,0.142857,0.5,inf,,0.142857


In [None]:


list1.reverse()




In [None]:
# dataframe combined with series

no_set_3 = no_set_1.iloc[0, :]
no_set_4 = no_set_1.iloc[:, 0]

no_set_5 = pd.concat((no_set_3, no_set_4))

letters_a_g = ["a", "b", "c", "d", "e", "f", "g"]
letters_a_g.reverse()
no_set_5.index = letters_a_g

no_set_1.add(no_set_3)
no_set_1.add(no_set_4, axis=0)
no_set_1.add(no_set_5, axis = 0)


Unnamed: 0,a,b,c,d
a,8.0,9.0,10.0,11.0
b,8.0,9.0,10.0,11.0
c,8.0,9.0,10.0,11.0
d,,,,
e,,,,
f,,,,
g,,,,


In [None]:
# Function Apply / Map

no_set_1.apply(lambda x: np.median(x), axis = 0, result_type = "reduce")

drnd_gen = np.random.default_rng(1)

toy_dict = {
    "name": ["paul", "pablo", "paulo", "paulie"],
    "age": [36, 21, 60, 25],
    "random": rnd_gen.integers(0, 10, 4)
}

df = pd.DataFrame(toy_dict, index=["now", "young", "old", "youngish"])

df["super"] = 1

df["name"] = df["name"].str.capitalize()

df.sort_index(axis = 0, ascending=True)
df = df.assign(extra = [1, 1, 2, 2])
df.sort_values(["age", "extra", "age"], inplace=True)

df.index.is_boolean()


  df.index.is_boolean()


<function pandas.core.algorithms.unique(values)>

In [557]:
# summary functions#

names = df["name"]

names.sort_values()
df.loc[:, "extra"].value_counts()


extra
1    2
2    2
Name: count, dtype: int64