In [5]:
import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston']
}

df = pd.DataFrame(data)

print(df['Name'])

0      Alice
1        Bob
2    Charlie
3      David
Name: Name, dtype: object


In [7]:
df['Country'] = ['USA', 'USA', 'USA', 'USA']
print(df)

      Name  Age         City Country
0    Alice   25     New York     USA
1      Bob   30  Los Angeles     USA
2  Charlie   35      Chicago     USA
3    David   40      Houston     USA


In [8]:
df.loc[4] = ['Eve', 28, 'San Francisco', 'USA']
print(df)

      Name  Age           City Country
0    Alice   25       New York     USA
1      Bob   30    Los Angeles     USA
2  Charlie   35        Chicago     USA
3    David   40        Houston     USA
4      Eve   28  San Francisco     USA


In [9]:
df.loc[1, 'Age'] = 31
print(df)

      Name  Age           City Country
0    Alice   25       New York     USA
1      Bob   31    Los Angeles     USA
2  Charlie   35        Chicago     USA
3    David   40        Houston     USA
4      Eve   28  San Francisco     USA


In [10]:
df['Age'] = df['Age'].astype(float)
print(df.dtypes)

Name        object
Age        float64
City        object
Country     object
dtype: object


In [12]:
df_filtered = df[df['Age'] >= 30]
print(df_filtered)

      Name   Age         City Country
1      Bob  31.0  Los Angeles     USA
2  Charlie  35.0      Chicago     USA
3    David  40.0      Houston     USA


In [13]:
df_filtered = df[(df['Age'] >= 30) & (df['City'] == 'Chicago')]
print(df_filtered)

      Name   Age     City Country
2  Charlie  35.0  Chicago     USA


In [14]:
arrays = [
    ['Group1', 'Group1', 'Group2', 'Group2'],
    ['Subgroup1', 'Subgroup2', 'Subgroup1', 'Subgroup2']
]
index = pd.MultiIndex.from_arrays(arrays, names=('Group', 'Subgroup'))

data = {'Value': [10, 20, 30, 40]}
df_multi = pd.DataFrame(data, index=index)
print(df_multi)

                  Value
Group  Subgroup        
Group1 Subgroup1     10
       Subgroup2     20
Group2 Subgroup1     30
       Subgroup2     40


In [15]:
print(df_multi.loc['Group1'])

           Value
Subgroup        
Subgroup1     10
Subgroup2     20


In [20]:
print(df)
print(df.loc[1, 'Name'])

      Name   Age           City Country
0    Alice  25.0       New York     USA
1      Bob  31.0    Los Angeles     USA
2  Charlie  35.0        Chicago     USA
3    David  40.0        Houston     USA
4      Eve  28.0  San Francisco     USA
Bob


In [21]:
print(df.iloc[1, 0])

Bob


In [22]:
print(df.at[1, 'Name'])

Bob


In [23]:
print(df.iat[1, 0])

Bob


In [24]:
data = {'Name': ['Alice', 'Bob', 'Alice', 'David'],
        'Age': [25, 30, 25, 40]}

df = pd.DataFrame(data)

# 중복값 확인
print(df.duplicated())

0    False
1    False
2     True
3    False
dtype: bool


In [25]:
df_no_duplicates = df.drop_duplicates()
print(df_no_duplicates)

    Name  Age
0  Alice   25
1    Bob   30
3  David   40


In [26]:
df_sorted = df.sort_values(by='Age')
print(df_sorted)

    Name  Age
0  Alice   25
2  Alice   25
1    Bob   30
3  David   40


In [27]:
df_sorted_desc = df.sort_values(by='Age', ascending=False)
print(df_sorted_desc)

    Name  Age
3  David   40
1    Bob   30
0  Alice   25
2  Alice   25


In [28]:
df_sorted_index = df.sort_index()
print(df_sorted_index)

    Name  Age
0  Alice   25
1    Bob   30
2  Alice   25
3  David   40


In [29]:
import pandas as pd

data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Score': [85, 90, 78, 92]
}

df = pd.DataFrame(data)


In [31]:
df['AdjustedScore'] = df['Score'].apply(lambda x: x + 10)
print(df)

      Name  Score  AdjustedScore
0    Alice     85             95
1      Bob     90            100
2  Charlie     78             88
3    David     92            102


In [33]:
gender_data = pd.Series(['Male', 'Female', 'Female', 'Male'])

gender_numeric = gender_data.map(lambda x: 0 if x == 'Male' else 1)
print(gender_numeric)

0    0
1    1
2    1
3    0
dtype: int64


In [35]:
df1 = pd.DataFrame({'CustomerID': [1, 2, 3], 'Product': ['Laptop', 'Mouse', 'Keyboard']})
df2 = pd.DataFrame({'CustomerID': [2, 3, 4], 'Location': ['USA', 'Canada', 'Mexico']})

df_merged = pd.merge(df1, df2, on='CustomerID', how='inner')
print(df_merged)

   CustomerID   Product Location
0           2     Mouse      USA
1           3  Keyboard   Canada


In [38]:
df1 = pd.DataFrame({'A': ['A0', 'A1'], 'B': ['B0', 'B1']})
df2 = pd.DataFrame({'A': ['A2', 'A3'], 'B': ['B2', 'B3']})

df_concat = pd.concat([df1, df2], axis=0)
print(df1)
print(df2)
print(df_concat)

    A   B
0  A0  B0
1  A1  B1
    A   B
0  A2  B2
1  A3  B3
    A   B
0  A0  B0
1  A1  B1
0  A2  B2
1  A3  B3


In [41]:
df1 = pd.DataFrame({'A': ['A0', 'A1']}, index=['K0', 'K1'])
df2 = pd.DataFrame({'B': ['B0', 'B1']}, index=['K0', 'K1'])

df_join = df1.join(df2)
print(df1)
print(df2)
print(df_join)

     A
K0  A0
K1  A1
     B
K0  B0
K1  B1
     A   B
K0  A0  B0
K1  A1  B1


In [44]:
data = {'Date': ['2024-01-01', '2024-01-01', '2024-01-02'],
        'City': ['New York', 'Los Angeles', 'New York'],
        'Temperature': [55, 68, 52]}

df_weather = pd.DataFrame(data)

df_pivot = df_weather.pivot(index='Date', columns="City", values="Temperature")
print(df_pivot)
print("---------")
print(df_weather)

City        Los Angeles  New York
Date                             
2024-01-01         68.0      55.0
2024-01-02          NaN      52.0
---------
         Date         City  Temperature
0  2024-01-01     New York           55
1  2024-01-01  Los Angeles           68
2  2024-01-02     New York           52


In [45]:
sales_data = {
    'Product': ['A', 'B', 'A', 'B'],
    'Sales': [100, 150, 200, 250],
    'Region': ['North', 'South', 'North', 'South']
}

df_sales = pd.DataFrame(sales_data)
df_pivot_table = df_sales.pivot_table(values='Sales', index='Region', columns='Product', aggfunc='sum')
print(df_pivot_table)


Product      A      B
Region               
North    300.0    NaN
South      NaN  400.0


In [49]:
df_wide = pd.DataFrame({
    'ID': [1, 2, 3],
    'Math': [85, 90, 88],
    'English': [78, 85, 90]
})

df_melt = pd.melt(df_wide, id_vars=['ID'], value_vars=['Math', 'English'], var_name="Subject", value_name="Score")
df_melt = pd.melt(df_wide, id_vars=['ID'], value_vars=['Math', 'English'], var_name='Subject', value_name='Score')

print(df_melt)
print("---------")
print(df_wide)

   ID  Subject  Score
0   1     Math     85
1   2     Math     90
2   3     Math     88
3   1  English     78
4   2  English     85
5   3  English     90
---------
   ID  Math  English
0   1    85       78
1   2    90       85
2   3    88       90


In [57]:
df_wide = pd.DataFrame({
    'ID': [1, 2, 3],
    'Math': [85, 90, 88],
    'English': [78, 85, 90]
})

df_melt = pd.melt(df_wide, id_vars=['ID'], value_vars=['Math', 'English'], var_name='Subject', value_name='Score')
print(df_melt)

   ID  Subject  Score
0   1     Math     85
1   2     Math     90
2   3     Math     88
3   1  English     78
4   2  English     85
5   3  English     90


In [59]:
data = {
    'Department': ['HR', 'IT', 'HR', 'Finance', 'IT', 'Finance'],
    'Salary': [50000, 60000, 52000, 80000, 65000, 90000]
}
df = pd.DataFrame(data)

groups = df.groupby('Department')['Salary'].mean()
print(groups)

Department
Finance    85000.0
HR         51000.0
IT         62500.0
Name: Salary, dtype: float64


In [61]:
data = {
    'Department': ['HR', 'IT', 'HR', 'Finance', 'IT', 'Finance'],
    'Location': ['New York', 'New York', 'Chicago', 'Chicago', 'New York', 'Chicago'],
    'Salary': [50000, 60000, 52000, 80000, 65000, 90000]
}
df = pd.DataFrame(data)

grouped = df.groupby(['Department', 'Location'])["Salary"].mean()
print(grouped)

Department  Location
Finance     Chicago     85000.0
HR          Chicago     52000.0
            New York    50000.0
IT          New York    62500.0
Name: Salary, dtype: float64


In [63]:
aggregated = df.groupby('Department').agg({
    'Salary' : ['sum', 'mean', 'count']
})
print(aggregated)

            Salary               
               sum     mean count
Department                       
Finance     170000  85000.0     2
HR          102000  51000.0     2
IT          125000  62500.0     2


In [65]:
unstacked = df.groupby(['Department','Location'])['Salary'].mean().unstack()
print(unstacked)

Location    Chicago  New York
Department                   
Finance     85000.0       NaN
HR          52000.0   50000.0
IT              NaN   62500.0


In [66]:
stacked = unstacked.stack()
print(stacked)

Department  Location
Finance     Chicago     85000.0
HR          Chicago     52000.0
            New York    50000.0
IT          New York    62500.0
dtype: float64


In [67]:
df_reset = df.groupby('Department')['Salary'].mean().reset_index()
print(df_reset)

  Department   Salary
0    Finance  85000.0
1         HR  51000.0
2         IT  62500.0


In [77]:
data = {'Month': ['Jan', 'Feb', 'Mar', 'Apr', 'May'],
        'Sales': [200, 300, 400, 500, 450]}
df = pd.DataFrame(data)

df['Date'] = pd.to_datetime(df['Month'] + "2024")
print(df)


  Month  Sales       Date
0   Jan    200 2024-01-01
1   Feb    300 2024-02-01
2   Mar    400 2024-03-01
3   Apr    500 2024-04-01
4   May    450 2024-05-01


  df['Date'] = pd.to_datetime(df['Month'] + "2024")


In [78]:
df.set_index('Date', inplace=True)
monthly_sales = df.resample('M').sum()
print(monthly_sales)

           Month  Sales
Date                   
2024-01-31   Jan    200
2024-02-29   Feb    300
2024-03-31   Mar    400
2024-04-30   Apr    500
2024-05-31   May    450


  monthly_sales = df.resample('M').sum()


In [79]:
df = pd.read_csv('./data/contacts.csv')
df

Unnamed: 0,Name,Phone,Email
0,김민수,010-1234-5678,minsu.kim@gmail.com
1,이지은,010-2345-6789,jieun.lee@naver.com
2,박철수,010-3456-7890,chulsoo.park@hotmail.com
3,홍길동,010-4567-8901,gildong.hong@daum.net
4,김영희,010-5678-9012,younghee.kim@gmail.com
...,...,...,...
72,범수정,010-3456-7892,soojeong.beom@gmail.com
73,이호진,010-4567-8904,hojin.lee@daum.net
74,정지윤,010-5678-9015,jungzee@naver.com
75,김지현,010-6789-0126,jh.kim@gmail.com


In [80]:
df[df['Name'] == '박철수']

Unnamed: 0,Name,Phone,Email
2,박철수,010-3456-7890,chulsoo.park@hotmail.com
31,박철수,010-1234-5681,chulsoo.park@gmail.com


In [83]:
df[df['Phone'] == '010-2345-6789']

Unnamed: 0,Name,Phone,Email
1,이지은,010-2345-6789,jieun.lee@naver.com
71,엄정희,010-2345-6789,jeonghee.eom@hotmail.com
