# 创建pandas对象

In [1]:
import pandas as pd
import numpy as np

## Series

### 指定values和index来创建Series

In [2]:
data = pd.Series([0.5, 1.0, 2.0], index=['a', 'b', 'c'])
data

a    0.5
b    1.0
c    2.0
dtype: float64

In [3]:
data.values # 查看values

array([0.5, 1. , 2. ])

In [4]:
data.index # 查看index，它是一个pd.Index对象

Index(['a', 'b', 'c'], dtype='object')

### 字典转化为Series

In [5]:
population_dict = {"California": 3833, 
                   "Texas":2644, 
                   "New York": 1965}

In [6]:
population = pd.Series(population_dict) # 由字典转化
population

California    3833
Texas         2644
New York      1965
dtype: int64

## DataFrame

### 指定values,index和columns来创建DataFrame

In [7]:
df = pd.DataFrame(np.random.rand(3, 2), columns=['first', 'second'], index=['a', 'b', 'c'])
df

Unnamed: 0,first,second
a,0.264194,0.838869
b,0.616192,0.500136
c,0.362363,0.211565


In [8]:
df.values # 查看values

array([[0.26419399, 0.83886945],
       [0.61619166, 0.50013584],
       [0.3623633 , 0.21156465]])

In [9]:
df.index, df.columns # index和columns都是pd.Index对象

(Index(['a', 'b', 'c'], dtype='object'),
 Index(['first', 'second'], dtype='object'))

### 双重字典转化为DataFrame

In [10]:
area_dict = {'California': 423967, 'Texas': 695662}
population_area_dict = {'population': population_dict, 'area': area_dict}

In [11]:
pd.DataFrame(population_area_dict) # 默认外层是columns，内层是index，无对应数据则为NaN

Unnamed: 0,population,area
California,3833,423967.0
Texas,2644,695662.0
New York,1965,


### 多个Series转化成DataFrame

In [12]:
area = pd.Series(area_dict)
area

California    423967
Texas         695662
dtype: int64

In [13]:
pd.DataFrame({'population': population, 'area': area})

Unnamed: 0,population,area
California,3833,423967.0
New York,1965,
Texas,2644,695662.0
