# pip install pandas

In [2]:
import pandas as pd

In [3]:
print(pd.__doc__)


pandas - a powerful data analysis and manipulation library for Python

**pandas** is a Python package providing fast, flexible, and expressive data
structures designed to make working with "relational" or "labeled" data both
easy and intuitive. It aims to be the fundamental high-level building block for
doing practical, **real world** data analysis in Python. Additionally, it has
the broader goal of becoming **the most powerful and flexible open source data
analysis / manipulation tool available in any language**. It is already well on
its way toward this goal.

Main Features
-------------
Here are just a few of the things that pandas does well:

  - Easy handling of missing data in floating point as well as non-floating
    point data.
  - Size mutability: columns can be inserted and deleted from DataFrame and
    higher dimensional objects
  - Automatic and explicit data alignment: objects can be explicitly aligned
    to a set of labels, or the user can simply ignore the labels and

In [5]:
serie = pd.Series([1, 23, 51, 230])
serie

0      1
1     23
2     51
3    230
dtype: int64

In [7]:
serie.size, serie.dtype

(4, dtype('int64'))

In [8]:
serie = pd.Series([1, 23, 51, 230, "salut"])
serie

0        1
1       23
2       51
3      230
4    salut
dtype: object

In [9]:
pd.DataFrame([20, 30, 310, 40])

Unnamed: 0,0
0,20
1,30
2,310
3,40


In [10]:
pd.DataFrame([20, 30, 310, 40], [10, 50, 320, 450])

Unnamed: 0,0
10,20
50,30
320,310
450,40


In [11]:
pd.DataFrame({"col1": [20, 30, 310, 40],"col2": [10, 50, 320, 450]})

Unnamed: 0,col1,col2
0,20,10
1,30,50
2,310,320
3,40,450


In [12]:
date = {
    "culori" : ["rosu", "negru", "verde"],
    "marimi" : [10, 30, 100],
    "zile"   : ["luni", "marti", "miercuri"]
}
pd.DataFrame(date)



Unnamed: 0,culori,marimi,zile
0,rosu,10,luni
1,negru,30,marti
2,verde,100,miercuri


In [15]:
date = {
    "culori" : ["rosu", "negru", "verde"],
    "marimi" : [10, 30, 100],
    "greutati" : [100, 130, 120],
    "zile"   : ["luni", "marti", "miercuri"]
}
df = pd.DataFrame(date)
df

Unnamed: 0,culori,marimi,greutati,zile
0,rosu,10,100,luni
1,negru,30,130,marti
2,verde,100,120,miercuri


In [16]:
df.describe()

Unnamed: 0,marimi,greutati
count,3.0,3.0
mean,46.666667,116.666667
std,47.258156,15.275252
min,10.0,100.0
25%,20.0,110.0
50%,30.0,120.0
75%,65.0,125.0
max,100.0,130.0


In [17]:
date = {
    "culori" : ["rosu", "negru", "verde"],
    "marimi" : [10, 30, 100],
    "greutati" : [100, 130, 120],
    "zile"   : ["luni", "marti", "miercuri"]
}
df = pd.DataFrame(date)
df.head(1)




Unnamed: 0,culori,marimi,greutati,zile
0,rosu,10,100,luni


In [18]:
df.head(2)

Unnamed: 0,culori,marimi,greutati,zile
0,rosu,10,100,luni
1,negru,30,130,marti


In [19]:
df.tail(1)

Unnamed: 0,culori,marimi,greutati,zile
2,verde,100,120,miercuri


In [20]:
df.tail(2)

Unnamed: 0,culori,marimi,greutati,zile
1,negru,30,130,marti
2,verde,100,120,miercuri


In [21]:
df

Unnamed: 0,culori,marimi,greutati,zile
0,rosu,10,100,luni
1,negru,30,130,marti
2,verde,100,120,miercuri


In [24]:
coloana_culori = df["culori"]
coloana_culori, type(coloana_culori)

(0     rosu
 1    negru
 2    verde
 Name: culori, dtype: object,
 pandas.core.series.Series)

In [25]:
df["greutati"]

0    100
1    130
2    120
Name: greutati, dtype: int64

In [27]:
df[  ["greutati", "marimi"] ]

Unnamed: 0,greutati,marimi
0,100,10
1,130,30
2,120,100


In [30]:
df[  ["greutati", "marimi"] ]

Unnamed: 0,greutati,marimi
0,100,10
1,130,30
2,120,100


In [31]:
df

Unnamed: 0,culori,marimi,greutati,zile
0,rosu,10,100,luni
1,negru,30,130,marti
2,verde,100,120,miercuri


In [32]:
df [ df["greutati"] >= 120    ]

Unnamed: 0,culori,marimi,greutati,zile
1,negru,30,130,marti
2,verde,100,120,miercuri


In [33]:
df["greutati"] >= 120 

0    False
1     True
2     True
Name: greutati, dtype: bool

In [34]:
df

Unnamed: 0,culori,marimi,greutati,zile
0,rosu,10,100,luni
1,negru,30,130,marti
2,verde,100,120,miercuri


In [35]:
df["greutate_standard"] = 100

In [36]:
df

Unnamed: 0,culori,marimi,greutati,zile,greutate_standard
0,rosu,10,100,luni,100
1,negru,30,130,marti,100
2,verde,100,120,miercuri,100


In [37]:
df["diferenta"] = df["greutate_standard"] - df["greutati"]

In [39]:
df

Unnamed: 0,culori,marimi,greutati,zile,greutate_standard,diferenta
0,rosu,10,100,luni,100,0
1,negru,30,130,marti,100,-30
2,verde,100,120,miercuri,100,-20


In [40]:
df.to_csv("data_pandas.csv")