# Pandas

In [1]:
import pandas as pd

# データフレームの作成

In [2]:
data = {'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva', 'Frank', 'Grace', 'Hank', 'Ivy', 'Jack', 'Kelly', 'Leo', 'Mia', 'Nina', 'Oliver'],
        'Age': [25, 30, 35, 28, 40, 32, 37, 29, 31, 33, 36, 27, 38, 26, 39],
        'City': ['New York', 'San Francisco', 'Los Angeles', 'New York', 'San Francisco', 'Los Angeles', 'New York', 'San Francisco', 'Los Angeles', 'New York', 'San Francisco', 'Los Angeles', 'New York', 'San Francisco', 'Los Angeles']}

df = pd.DataFrame(data)

# データフレームの基本的な操作

In [3]:
print("First few rows of the DataFrame:")
print(df.head())

print("\nInformation about the DataFrame:")
print(df.info())

print("\nStatistical summary of the DataFrame:")
print(df.describe())

First few rows of the DataFrame:
      Name  Age           City
0    Alice   25       New York
1      Bob   30  San Francisco
2  Charlie   35    Los Angeles
3    David   28       New York
4      Eva   40  San Francisco

Information about the DataFrame:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    15 non-null     object
 1   Age     15 non-null     int64 
 2   City    15 non-null     object
dtypes: int64(1), object(2)
memory usage: 492.0+ bytes
None

Statistical summary of the DataFrame:
            Age
count  15.00000
mean   32.40000
std     4.91063
min    25.00000
25%    28.50000
50%    32.00000
75%    36.50000
max    40.00000


# データの選択とフィルタリング

In [4]:
selected_column = df['Age']
print("\nSelected column 'Age':")
print(selected_column)

filtered_data = df[df['Age'] > 30]
print("\nData where Age is greater than 30:")
print(filtered_data)


Selected column 'Age':
0     25
1     30
2     35
3     28
4     40
5     32
6     37
7     29
8     31
9     33
10    36
11    27
12    38
13    26
14    39
Name: Age, dtype: int64

Data where Age is greater than 30:
       Name  Age           City
2   Charlie   35    Los Angeles
4       Eva   40  San Francisco
5     Frank   32    Los Angeles
6     Grace   37       New York
8       Ivy   31    Los Angeles
9      Jack   33       New York
10    Kelly   36  San Francisco
12      Mia   38       New York
14   Oliver   39    Los Angeles


# データの変更と追加

In [5]:
df.loc[1, 'Age'] = 31
df['Salary'] = [50000, 60000, 70000, 55000, 62000, 75000, 68000, 85000, 60000, 70000, 78000, 59000, 54000, 72000, 76000]

print("\nDataFrame after modifications:")
print(df)


DataFrame after modifications:
       Name  Age           City  Salary
0     Alice   25       New York   50000
1       Bob   31  San Francisco   60000
2   Charlie   35    Los Angeles   70000
3     David   28       New York   55000
4       Eva   40  San Francisco   62000
5     Frank   32    Los Angeles   75000
6     Grace   37       New York   68000
7      Hank   29  San Francisco   85000
8       Ivy   31    Los Angeles   60000
9      Jack   33       New York   70000
10    Kelly   36  San Francisco   78000
11      Leo   27    Los Angeles   59000
12      Mia   38       New York   54000
13     Nina   26  San Francisco   72000
14   Oliver   39    Los Angeles   76000


# データのグループ化と集計

In [6]:
numeric_columns = df.select_dtypes(include='number')
grouped_data = numeric_columns.groupby(df['City']).mean()
print("\nGrouped data with average Age and Salary:")
print(grouped_data)


Grouped data with average Age and Salary:
                Age   Salary
City                        
Los Angeles    32.8  68000.0
New York       32.2  59400.0
San Francisco  32.4  71400.0
