In [1]:
import pandas as pd

In [110]:
df = pd.DataFrame.from_dict({
    'Name': "apple pinapple orange banana".split(), 
    'Price':  [6, 10, 5, 4],
})

In [9]:
df = df.set_index('Name')

## Using Customed Index

In [47]:
df.loc['pinapple']

Price    10
Name: pinapple, dtype: int64

In [50]:
df.loc['orange']

Price    5
Name: orange, dtype: int64

In [53]:
df[df['Price'] == 5]

Unnamed: 0_level_0,Price
Name,Unnamed: 1_level_1
orange,5


## Simplest Op

In [58]:
df["Price"] = df["Price"] + 2

In [59]:
df

Unnamed: 0_level_0,Price
Name,Unnamed: 1_level_1
apple,8
pinapple,12
orange,7
banana,6


In [119]:
def inc(v, split, inc_n):
    if v >= split: return v + inc_n
    else:
        return v + inc_n * 2

In [65]:
df['Price'].map(inc)

Name
apple       10
pinapple    13
orange       9
banana       8
Name: Price, dtype: int64

In [103]:
%%timeit

# new_prices = []

# for row in df.iterrows():
#     name, price = row
#     new_p = inc(price[0])
#     new_prices.append(new_p)

# df['Price'] = new_prices

df['Price'].map(inc)

33.4 µs ± 785 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


Assuming, there are so many sentences. Len

In [98]:
def less(length):
    return length < 20

In [97]:
list(filter(lambda x: x <= 20, map(len, ['i agree with you', 'this a good idea', 'some others doesnt think so'])))

[16, 16]

In [102]:
help(df['Price'].map)

Help on method map in module pandas.core.series:

map(arg, na_action=None) -> 'Series' method of pandas.core.series.Series instance
    Map values of Series according to an input mapping or function.
    
    Used for substituting each value in a Series with another value,
    that may be derived from a function, a ``dict`` or
    a :class:`Series`.
    
    Parameters
    ----------
    arg : function, collections.abc.Mapping subclass or Series
        Mapping correspondence.
    na_action : {None, 'ignore'}, default None
        If 'ignore', propagate NaN values, without passing them to the
        mapping correspondence.
    
    Returns
    -------
    Series
        Same index as caller.
    
    See Also
    --------
    Series.apply : For applying more complex functions on a Series.
    DataFrame.apply : Apply a function row-/column-wise.
    DataFrame.applymap : Apply a function elementwise on a whole DataFrame.
    
    Notes
    -----
    When ``arg`` is a dictionary, values 

In [118]:
df['Price']

0     6
1    10
2     5
3     4
Name: Price, dtype: int64

In [121]:
df['Price'].apply(inc, args=(9, 1))

0     8
1    11
2     7
3     6
Name: Price, dtype: int64

In [106]:
help(df['Price'].apply)

Help on method apply in module pandas.core.series:

apply(func: 'AggFuncType', convert_dtype: 'bool' = True, args: 'tuple[Any, ...]' = (), **kwargs) -> 'DataFrame | Series' method of pandas.core.series.Series instance
    Invoke function on values of Series.
    
    Can be ufunc (a NumPy function that applies to the entire Series)
    or a Python function that only works on single values.
    
    Parameters
    ----------
    func : function
        Python function or NumPy ufunc to apply.
    convert_dtype : bool, default True
        Try to find better dtype for elementwise function results. If
        False, leave as dtype=object. Note that the dtype is always
        preserved for some extension array dtypes, such as Categorical.
    args : tuple
        Positional arguments passed to func after the series value.
    **kwargs
        Additional keyword arguments passed to func.
    
    Returns
    -------
    Series or DataFrame
        If func returns a Series object the result

In [126]:
df.eval('Price_tomorrow = Price + 1')

Unnamed: 0,Name,Price,Price_tomorrow
0,apple,6,7
1,pinapple,10,11
2,orange,5,6
3,banana,4,5


In [131]:
from sklearn import datasets

In [143]:
data = datasets.load_iris()

In [145]:
data['feature_names']

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [146]:
df = pd.DataFrame(data['data'])

In [147]:
df.columns = data['feature_names']

In [150]:
for v, g in df.groupby('petal length (cm)'):
    print(v)
    print(g)

1.0
    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
22                4.6               3.6                1.0               0.2
1.1
    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
13                4.3               3.0                1.1               0.1
1.2
    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
14                5.8               4.0                1.2               0.2
35                5.0               3.2                1.2               0.2
1.3
    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
2                 4.7               3.2                1.3               0.2
16                5.4               3.9                1.3               0.4
36                5.5               3.5                1.3               0.2
38                4.4               3.0                1.3               0.2
40                5.0               3.5                1.3  

In [158]:
petal_len_to_df = {v: g for v, g in df.groupby('petal length (cm)')}

In [161]:
petal_len_to_df[4.1]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
67,5.8,2.7,4.1,1.0
88,5.6,3.0,4.1,1.3
99,5.7,2.8,4.1,1.3


## Aggregation

In [169]:
import numpy as np

In [172]:
df.groupby('petal length (cm)').aggregate(np.mean)

Unnamed: 0_level_0,sepal length (cm),sepal width (cm),petal width (cm)
petal length (cm),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,4.6,3.6,0.2
1.1,4.3,3.0,0.1
1.2,5.4,3.6,0.2
1.3,4.842857,3.228571,0.257143
1.4,4.915385,3.353846,0.207692
1.5,5.146154,3.569231,0.238462
1.6,4.914286,3.342857,0.285714
1.7,5.4,3.6,0.35
1.9,4.95,3.6,0.3
3.0,5.1,2.5,1.1


## Pandas like SQL

In [247]:
data_1 = {
    'user': ['001', '002', '003'], 
    'addr': ['Nanjing', 'Beijing', 'Fuzhou'], 
    'changed_addr': ['4', '5', '1']
}

df1 = pd.DataFrame.from_dict(data_1)

In [248]:
data_2 = {
    'user': ['001', '002', '003', '004'],
    'product_id': ['1001', '12311', '0013', '991'],
    'price_total': ['12', '34', '51', '90']
}

df2 = pd.DataFrame.from_dict(data_2)

In [249]:
df1

Unnamed: 0,user,addr,changed_addr
0,1,Nanjing,4
1,2,Beijing,5
2,3,Fuzhou,1


In [250]:
df2

Unnamed: 0,user,product_id,price_total
0,1,1001,12
1,2,12311,34
2,3,13,51
3,4,991,90


In [243]:
#df1['user'] = df1['user'].astype('int')

In [244]:
#df2['user'] = df2['user'].astype('int')

In [254]:
df1.join(df2.set_index('user'), on='user', how='left')

Unnamed: 0,user,addr,changed_addr,product_id,price_total
0,1,Nanjing,4,1001,12
1,2,Beijing,5,12311,34
2,3,Fuzhou,1,13,51


In [255]:
df1.join(df2.set_index('user'), on='user', how='right')

Unnamed: 0,user,addr,changed_addr,product_id,price_total
0.0,1,Nanjing,4.0,1001,12
1.0,2,Beijing,5.0,12311,34
2.0,3,Fuzhou,1.0,13,51
,4,,,991,90


In [207]:
df.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,149.0
mean,5.843333,3.057333,3.758,1.19396
std,0.828066,0.435866,1.765298,0.761952
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [299]:
from matplotlib.animation import FuncAnimation
import matplotlib.pyplot as plt
import numpy as np

In [300]:
import random

In [301]:
x = np.linspace(-10, 10)

In [316]:
point = (0, 0)

In [320]:
def draw(i):
    fig.clear()
    plt.xlim(-10, 10)
    plt.ylim(-10,10)
    x_m = random.normalvariate(0, 1)
    y_m = random.normalvariate(0, 1)
    
    x, y = start_point
    x += x_m; y += y_m
    
#    plt.plot(x, x * random.randint(-10, 10))
    plt.scatter(x, y)
    point = (x, y)

In [321]:
%matplotlib notebook

In [319]:
fig = plt.gcf()
FuncAnimation(fig, draw, interval=500)

<IPython.core.display.Javascript object>

<matplotlib.animation.FuncAnimation at 0x7fe7c97212a0>

In [322]:
from sklearn.manifold import TSNE

In [330]:
X = np.random.uniform(size=(1000, 4))

In [332]:
X_embedding = TSNE(n_components=2).fit_transform(X)



In [337]:
plt.scatter(X_embedding[:, 0], X_embedding[:, 1])

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x7fe7c96600a0>