In [None]:
import numpy as np
import pandas as pd

In [None]:
dates = pd.date_range("20130101", periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
# index 대신에 dates가 쓰이며 Series Indexing이 가능하다   🔰🔰

In [None]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])   # 🔰
df1.loc[dates[0]:dates[1], 'E'] = 1
df1['F'] = list(range(1, len(df1)+1, 1))
df1

In [None]:
df1.dropna(how='any')

In [None]:
df1.fillna(value=3.3)

In [None]:
pd.isna(df1)

In [None]:
df1.mean(0)

In [None]:
df1.mean(1)

In [None]:
s = pd.Series([1, 3, 5, np.nan, 6, 8], index = dates).shift(2)   # 🔰
s

In [None]:
df1

In [None]:
df.sub(s, axis='index')
# s, df는 index가 동일 하기 때문에 axis를 index로 맞춰준다.

In [None]:
df1.apply(np.cumsum)
# argument로 ()없이 함수명만 들어간다.

In [None]:
s = pd.Series(np.random.randint(1, 7, 10))
s

In [None]:
s.value_counts()

In [None]:
# Concat

df = pd.DataFrame(np.random.randn(10, 4))

# break it into pieces
pieces = [df[:3], df[3:7], df[7:]]
pieces[1]

In [None]:
pd.concat(pieces)

In [None]:
# Join
# merge() enables SQL style join types along specific columns. See the Database style joining section.

In [None]:
left = pd.DataFrame({"key": ["foo", "foo"], "lval":[1, 2]})
right = pd.DataFrame({"key": ["foo"], "rval":[4]})

In [None]:
print(left,'\n', right)
pd.merge(left, right, on='key')

In [None]:
left = pd.DataFrame({"key": ["foo", "bar"], "lval":[1, 2]})
right = pd.DataFrame({"key": ["foo", "bar"], "rval":[4, 5]})

print(left,"\n", right)
pd.merge(left, right, on = 'key')

In [None]:
left = pd.DataFrame({"key": ["foo", "bar"], "lval":[1, 2]})
right = pd.DataFrame({"key": ["foo", "baz"], "rval":[4, 5]})

print(left,"\n", right)
pd.merge(left, right, how='inner', on = 'key')

In [None]:
pd.merge(left, right, how='left', on = 'key')

In [None]:
pd.merge(left, right, how='outer', on = 'key')

In [None]:
df1 = pd.DataFrame({"left": ["foo", "bar"]})
df2 = pd.DataFrame({"right": [7, 8]})
print(df1, '\n', df2)
pd.merge(df1, df2, how='cross')

In [None]:
from pandas import merge_ordered
df1 = pd.DataFrame(
    {
        "key": ["a", "c", "e", "a", "c", "e"],
        "lvalue": [1, 2, 3, 1, 2, 3],
        "group": ["a", "a", "a", "b", "b", "b"]
    }
)
df1

In [None]:
df2 = pd.DataFrame({"key": ["b", "c", "d"], "rvalue": [1, 2, 3]})
df2

In [None]:
merge_ordered(df1, df2, fill_method="ffill", left_by="group")

In [None]:
# Ordered Merge (chatGPT)
import pandas as pd

# Create two DataFrame objects
df1 = pd.DataFrame({
    'date': pd.to_datetime(['2022-01-01', '2022-01-03', '2022-01-05']),
    'value1': [1, 2, 3]
})
df1

In [None]:
df2 = pd.DataFrame({
    'date': pd.to_datetime(['2022-01-02', '2022-01-04', '2022-01-06']),
    'value2': [4, 5, 6]
})
df2

In [None]:
# Perform an ordered merge on the 'date' column
merged_df = pd.merge_ordered(df1, df2)

print(merged_df)

In [None]:
# merge_asof
import pandas as pd

# Create two DataFrame objects
df1 = pd.DataFrame({
    'time': [1, 2, 3, 4, 5],
    'value1': ['A', 'B', 'C', 'D', 'E']
})

df2 = pd.DataFrame({
    'time': [1, 3, 5],
    'value2': [10, 30, 50]
})

# Perform an "asof" merge allowing exact matches
print(df1, '\n', df2)
merged_df = pd.merge_asof(df1, df2, on='time', allow_exact_matches=True)
merged_df = pd.merge_asof(df1, df2, on='time', allow_exact_matches=True, direction='forward')
merged_df
# print(merged_df)


In [None]:
# direction은 backward가 defualt
# allow_exact_match = False는 exact match가 있는 경우는 NaN처리해 버린다. (왜 일까?) deafult True


In [None]:
# Grouping

df = pd.DataFrame(
    {
        "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
        "B": ["one", "one", "two", "three", "two", "two", "one", "three"],
        "C": np.random.randn(8),
        "D": np.random.randn(8),
    }
)
df

In [None]:
df.groupby('A')[["C", "D"]].sum()

In [None]:
df.groupby(['A', 'B']).sum().sort_index(level=['B'], key=lambda x: 1 if x == 'one' else 2 )

In [None]:
# Grouping

df = pd.DataFrame(
    {
        "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
        "B": ["one", "one", "two", "three", "two", "two", "one", "tree"],
        "C": np.random.randn(8),
        "D": np.random.randn(8),
    }
)
df

In [None]:
df.groupby('A')[["C", "D"]].sum()

In [None]:
df.groupby(['A', 'B']).median()

In [None]:
# Reshaping
# stack(), unstack, melt()

tuples = list(
    zip(
        ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
        ["one", "two", "one", "two", "one", "two", "one", "two"],
    )
)
tuples

In [None]:
# Stack
import pandas as pd
import numpy as np

index = pd.MultiIndex.from_tuples(tuples, names=["first", "second"])
multiIndex_df = pd.DataFrame(np.random.randn(8, 2), index=index, columns=["A", "B"])
multiIndex_df 

In [None]:
stacked = multiIndex_df.stack()

In [None]:
stacked.unstack()

In [None]:
# Pivot tables

pivot_df = pd.DataFrame(
    {
        "A": ["one", "one", "two", "three"] * 3,
        "B": ["A", "B", "C"] * 4,
        "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 2,
        "D": np.random.randn(12),
        "E": np.random.randn(12),
    }
)
pivot_df

In [None]:
pd.pivot_table(pivot_df, index = ['A', 'B'], values = 'D', columns=['C'])