# Pandas
https://pandas.pydata.org/pandas-docs/stable/getting_started/10min.html

Pandas is a library that somebody else made. We have it as a result of downloading Anaconda, but it's not regular Python. We have to make the environment we're working in know that we're using pandas. 

First thing you do:

` In [1]: import numpy as np`

`In [2]: import pandas as pd `

#### Rows x Columns
- Rows are observations
- Columbs are variables/features
- ex: df[1:3, 20:25]
    - Rows: 1:3, Columns: 20:25



In [3]:
import numpy as np
import pandas as pd

In [9]:
# .Series displays indexes --> indexes are the rows. It automatically numbers your rows

s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [14]:
# .date_range is a way to create a range of dates. Here is the syntax:
    # pd.date_range('YearMonthDate', periods = NumOfDates)
        ## Can go up to Hour --> minute --> second --> microsecond

dates = pd.date_range('20130101', periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [16]:
#Random numbers in standard deviation, 2d array with 6 rows, 4 columns

np.random.rand(6, 4)

array([[0.80358172, 0.62996438, 0.93880382, 0.12954881],
       [0.97536883, 0.7656829 , 0.92739232, 0.73058305],
       [0.3543873 , 0.84824403, 0.83885158, 0.82381947],
       [0.0376728 , 0.66171069, 0.16844224, 0.796899  ],
       [0.306365  , 0.50916351, 0.31218915, 0.16758526],
       [0.29216543, 0.23328345, 0.58445936, 0.44416298]])

In [17]:
np.random.randn(6, 4)

array([[-0.70722974,  1.13684228, -0.48768041,  0.55106094],
       [-1.46277959,  0.5021128 ,  2.12492907,  0.09408939],
       [ 0.84379527,  0.49401787,  0.13111605, -0.49455711],
       [-0.86342215,  1.28169883, -1.20329362,  0.75511739],
       [-0.49748228,  1.00598366,  2.77385379, -0.29769773],
       [ 0.06900549,  0.25528661, -1.57567921, -1.64950327]])

In [21]:
#gives you one random number in the range

np.random.randint(4, 6)

4

In [33]:
#DATAFRAME
#passed it data, told it what we wanted the index to be, and what we wanted to columns to be called

df = pd.DataFrame(np.random.randn(6, 4), index = [1,2,3,4,5,6], columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
1,-1.259084,-1.112785,-0.418711,-1.462391
2,-0.719278,-2.547588,-0.719605,0.211324
3,2.475845,1.771164,-0.290883,-1.917834
4,0.557406,0.679647,-2.330268,0.244581
5,0.965109,0.695013,0.783884,0.007853
6,-1.980834,0.518836,0.851424,-0.131748


In [34]:
df = pd.DataFrame(np.random.randn(6, 4), index = dates, columns=list('ABCD'))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.533625,0.120747,0.321336,0.754527
2013-01-02,-0.096121,0.70441,-0.494134,2.429745
2013-01-03,-0.695041,1.500539,-0.760885,-0.103884
2013-01-04,-0.995473,-0.170716,-0.241909,-0.447883
2013-01-05,-0.73716,-0.623464,-0.3383,1.827258
2013-01-06,0.58993,-0.861739,0.399229,-0.057986


In [37]:
df2 = pd.DataFrame({'A': 1.,
                     'B': pd.Timestamp('20130102'),
                     'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                     'D': np.array([3] * 4, dtype='int32'),
                     'E': pd.Categorical(["test", "train", "test", "train"]),
                     'F': 'foo'})
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [36]:
df2.dtypes

A           float64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

In [38]:
# setting a variable as the link to the data set --> formatting as json file bc that's what the site says it is

nyc311 = pd.read_json('https://data.cityofnewyork.us/resource/fhrw-4uyv.json')

In [40]:
#will give you first five rows:
nyc311.head()

Unnamed: 0,address_type,agency,agency_name,bbl,borough,bridge_highway_direction,bridge_highway_name,bridge_highway_segment,city,closed_date,...,resolution_action_updated_date,resolution_description,road_ramp,status,street_name,taxi_company_borough,taxi_pick_up_location,unique_key,x_coordinate_state_plane,y_coordinate_state_plane
0,ADDRESS,DSNY,Department of Sanitation,1006200000.0,MANHATTAN,,,,NEW YORK,,...,2019-02-14T00:00:00.000,The Department of Sanitation has sent this com...,,Assigned,WEST 10 STREET,,,41660852,983256.0,206753.0
1,ADDRESS,DSNY,Department of Sanitation,1018700000.0,MANHATTAN,,,,NEW YORK,,...,2019-02-16T00:00:00.000,The Department of Sanitation has sent this com...,,Assigned,WEST 99 STREET,,,41662305,992318.0,229571.0
2,ADDRESS,NYPD,New York City Police Department,2033560000.0,BRONX,,,,BRONX,,...,,Your complaint has been forwarded to the New Y...,,Open,DECATUR AVENUE,,,41659928,1019562.0,259651.0
3,ADDRESS,NYPD,New York City Police Department,2032590000.0,BRONX,,,,BRONX,,...,2019-02-10T02:25:17.000,Your complaint has been received by the Police...,,Assigned,HEATH AVENUE,,,41659555,1010824.0,257661.0
4,ADDRESS,NYPD,New York City Police Department,1021320000.0,MANHATTAN,,,,NEW YORK,,...,,Your complaint has been forwarded to the New Y...,,Open,WEST 177 STREET,,,41659370,1002833.0,247344.0


In [41]:
nyc311.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 39 columns):
address_type                      995 non-null object
agency                            1000 non-null object
agency_name                       1000 non-null object
bbl                               898 non-null float64
borough                           1000 non-null object
bridge_highway_direction          1 non-null object
bridge_highway_name               1 non-null object
bridge_highway_segment            1 non-null object
city                              991 non-null object
closed_date                       417 non-null object
community_board                   1000 non-null object
complaint_type                    1000 non-null object
created_date                      1000 non-null object
cross_street_1                    931 non-null object
cross_street_2                    929 non-null object
descriptor                        1000 non-null object
due_date                    

In [42]:
#you can tell that there are 1000 rows because there are 1000 entries