# Exploring series data
A series is a 1-dimensional array of a sequence of data.  It can consist of any data type.  

In [1]:
import pandas as pd

In [2]:
#tell pandas what we want the values to be
desserts = ["ice cream","cookies","cake","shake","smoothie","baclava","Turkish Delight"]

In [3]:
#call a series constructor method
pd.Series(desserts)

0          ice cream
1            cookies
2               cake
3              shake
4           smoothie
5            baclava
6    Turkish Delight
dtype: object

In [49]:
ages = [45,44,43,41,40,38,38,35,32,28]
newAge = pd.Series(ages) 

#note that "dtype" refers to the data type of the values, not the index or overall object.  "Object" is a string data type.

In [5]:
cool = [False, True, False, True, True, False, False, True, False, False]
pd.Series(cool)

0    False
1     True
2    False
3     True
4     True
5    False
6    False
7     True
8    False
9    False
dtype: bool

In [6]:
# You can turn a dictionary into a series, too
definitions = {"onomotopeia": "writing out a sound as it sounds.",
              "metaphor" : "comparing two objects to each other without using 'like' or 'as'",
               "simile" : "comparing two objects to each other using 'like' or 'as'"
              }
pd.Series(definitions)

onomotopeia                    writing out a sound as it sounds.
metaphor       comparing two objects to each other without us...
simile         comparing two objects to each other using 'lik...
dtype: object

### Attributes

attributes are values or properties of an object that are readable.

In [7]:
about_me = ["Smart","Handsome","Awesome","Haughty","Slow","Procrastinator"]
attributes = pd.Series(about_me)
attributes

0             Smart
1          Handsome
2           Awesome
3           Haughty
4              Slow
5    Procrastinator
dtype: object

type the name of the object, then type ".", then hit the "tab" key to see what you can do with that object

In [8]:
attributes.values

array(['Smart', 'Handsome', 'Awesome', 'Haughty', 'Slow',
       'Procrastinator'], dtype=object)

In [9]:
attributes.index #summary info about the object

RangeIndex(start=0, stop=6, step=1)

In [10]:
attributes.dtype # see what kind of data this is 

dtype('O')

In [44]:
#ndim to get the number dimensions (always 1 for a series.  Will be different for a dataframe)
attributes.ndim

1

In [45]:
#see the 'shape' of the data
attributes.shape

(6,)

In [47]:
prices.shape #note that lists don't have a 'shape'.  It must be a pandas object, such as a series or dataframe.

AttributeError: 'list' object has no attribute 'shape'

In [50]:
newAge.shape

(10,)

In [52]:
attributes.size #check the total vlues in the list.  This will count null values

6

In [53]:
newAge.size

10

In [55]:
attributes.name = "Stuff" #create a name attribute to refer to the series

In [56]:
attributes.name

'Stuff'

### Series methods

In [11]:
prices = [3.78, 4.25, 8.45, 1.75, 2.89, 10.34, 24.17]
series = pd.Series(prices)
prices

[3.78, 4.25, 8.45, 1.75, 2.89, 10.34, 24.17]

In [12]:
#add up everything in the series
series.sum()

55.629999999999995

In [13]:
series.product()

171581.52340816983

In [14]:
series.mean()

7.947142857142857

In [15]:
series.mean()

7.947142857142857

### Parameters and Arguments


In [18]:
fruits = ["apple","orange","banana", "jaca", "jabuticaba","blueberry","qumqwat"]
weekdays = ["Sunday","Monday", "Tuesday","Wednesday","Thursday","Friday","Saturday"]
pd.Series(fruits, weekdays) #hold down Shift + Tab to see parameters
pd.Series(data = fruits, index = weekdays)
pd.Series()

Series([], dtype: float64)

### The inplace Parameter
this will overwrite the original data with whatever change you're making to the data

In [60]:
series.sort_values(ascending = False, inplace = True)
series

6    24.17
5    10.34
2     8.45
1     4.25
0     3.78
4     2.89
3     1.75
dtype: float64

## Pandas plays nicely with built-in python methods.

In [34]:
len(prices)

7

In [37]:
dir(prices) #return all available methods (even hidden ones)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__delitem__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__rmul__',
 '__setattr__',
 '__setitem__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'append',
 'clear',
 'copy',
 'count',
 'extend',
 'index',
 'insert',
 'pop',
 'remove',
 'reverse',
 'sort']

In [38]:
#sort the list
sorted(attributes)

['Awesome', 'Handsome', 'Haughty', 'Procrastinator', 'Slow', 'Smart']

In [39]:
#convert to a python list
list(attributes)

['Smart', 'Handsome', 'Awesome', 'Haughty', 'Slow', 'Procrastinator']

In [41]:
#convert a list to a dictionary
dict(attributes)

{0: 'Smart',
 1: 'Handsome',
 2: 'Awesome',
 3: 'Haughty',
 4: 'Slow',
 5: 'Procrastinator'}

In [42]:
# max
max(prices)

24.17

In [43]:
# get min value
min(prices)

1.75

# the .read_csv() method

In [22]:
pd.read_csv("../data/pandas/pokemon.csv", usecols = ['Pokemon'], squeeze = True) #hold SHIFT + TAB to see all the available parameters
# usecols: takes a list of just the columns you want
# squeeze: converts to a panda series

0       Bulbasaur
1         Ivysaur
2        Venusaur
3      Charmander
4      Charmeleon
5       Charizard
6        Squirtle
7       Wartortle
8       Blastoise
9        Caterpie
10        Metapod
11     Butterfree
12         Weedle
13         Kakuna
14       Beedrill
15         Pidgey
16      Pidgeotto
17        Pidgeot
18        Rattata
19       Raticate
20        Spearow
21         Fearow
22          Ekans
23          Arbok
24        Pikachu
25         Raichu
26      Sandshrew
27      Sandslash
28        Nidoran
29       Nidorina
          ...    
691     Clauncher
692     Clawitzer
693    Helioptile
694     Heliolisk
695        Tyrunt
696     Tyrantrum
697        Amaura
698       Aurorus
699       Sylveon
700      Hawlucha
701       Dedenne
702       Carbink
703         Goomy
704       Sliggoo
705        Goodra
706        Klefki
707      Phantump
708     Trevenant
709     Pumpkaboo
710     Gourgeist
711      Bergmite
712       Avalugg
713        Noibat
714       Noivern
715       

In [23]:
googleData = pd.read_csv('../data/pandas/google_stock_price.csv', squeeze = True)

# .head() and .tail() methods

In [27]:
#get data
def getData():
    pokemon = pd.read_csv("../data/pandas/pokemon.csv",usecols=['Pokemon'],squeeze=True)
    google = pd.read_csv("../data/pandas/google_stock_price.csv",squeeze=True)
    return pokemon, google

pokemon, google = getData()


In [33]:
#check to see if data is in this scope
pokemon.head(5) #gets first 5 rows.  Returns a COPY of new series

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [32]:
#check the end of this dataset
google.tail(5)

3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, dtype: float64

# .sort_values() method

In [57]:
#sort value in ascending order
google.sort_values().head(10)

11    49.95
9     50.07
0     50.12
10    50.70
12    50.74
7     50.95
14    51.10
13    51.10
8     51.13
3     52.38
Name: Stock Price, dtype: float64

In [58]:
#get values in reverse order
google.sort_values(ascending=False).head(10)

3011    782.22
2859    776.60
3009    773.18
3007    772.88
3010    771.61
3008    771.07
2860    771.00
3006    768.79
2840    767.04
2843    766.81
Name: Stock Price, dtype: float64

# .sort_index() method
you can sort the series by the index rather than the value.  This way you can always get the original order back.

In [63]:
google.sort_index(ascending=False, inplace = True)
google

3011    782.22
3010    771.61
3009    773.18
3008    771.07
3007    772.88
3006    768.79
3005    745.91
3004    741.77
3003    738.42
3002    739.77
3001    742.74
3000    738.63
2999    741.19
2998    736.96
2997    733.78
2996    719.85
2995    720.95
2994    716.98
2993    720.64
2992    715.09
2991    705.63
2990    695.36
2989    697.77
2988    694.49
2987    699.21
2986    692.10
2985    684.11
2984    680.04
2983    668.26
2982    675.22
         ...  
29       64.74
28       65.47
27       63.37
26       59.07
25       59.86
24       60.35
23       59.13
22       58.86
21       59.62
20       58.69
19       56.93
18       55.94
17       55.69
16       53.70
15       52.61
14       51.10
13       51.10
12       50.74
11       49.95
10       50.70
9        50.07
8        51.13
7        50.95
6        53.02
5        53.90
4        52.95
3        52.38
2        54.65
1        54.10
0        50.12
Name: Stock Price, Length: 3012, dtype: float64

# using Python's "in" keyword

In [66]:
#you can use "in" to see if a value in a list, using python's built-in "in" method
11 in [1,2,3,4,5,6,7,8,9,10]

False

In [68]:
#but it won't work on this, like you might expect:
pokemon.head(5)
"Bulbasaur" in pokemon

#why not?

False

In [69]:
#b/c pandas is searching the index of a series.  So, I could check it against the index or values.  For example
"Bulbasaur" in pokemon.values

True

# Extract values from a series based on its index position or label

In [70]:
#just use the name of the series and treat the value at its position in the list
google[100]

96.67

In [73]:
#you can get several values using a list in side of the list
google[[100,34,76]]

100    96.67
34     69.36
76     85.63
Name: Stock Price, dtype: float64

In [74]:
#you can also get a range
google[12:20]

2999    741.19
2998    736.96
2997    733.78
2996    719.85
2995    720.95
2994    716.98
2993    720.64
2992    715.09
Name: Stock Price, dtype: float64

In [78]:
#or get values from the end
google[-10:]

9    50.07
8    51.13
7    50.95
6    53.02
5    53.90
4    52.95
3    52.38
2    54.65
1    54.10
0    50.12
Name: Stock Price, dtype: float64

In [79]:
#get everything up to a certain position
google[:15]

3011    782.22
3010    771.61
3009    773.18
3008    771.07
3007    772.88
3006    768.79
3005    745.91
3004    741.77
3003    738.42
3002    739.77
3001    742.74
3000    738.63
2999    741.19
2998    736.96
2997    733.78
Name: Stock Price, dtype: float64

### getting data from a label

In [84]:
#import column data as a series 
poke = pd.read_csv("../data/pandas/pokemon.csv", index_col="Pokemon", squeeze=True)
poke

Pokemon
Bulbasaur        Grass
Ivysaur          Grass
Venusaur         Grass
Charmander        Fire
Charmeleon        Fire
Charizard         Fire
Squirtle         Water
Wartortle        Water
Blastoise        Water
Caterpie           Bug
Metapod            Bug
Butterfree         Bug
Weedle             Bug
Kakuna             Bug
Beedrill           Bug
Pidgey          Normal
Pidgeotto       Normal
Pidgeot         Normal
Rattata         Normal
Raticate        Normal
Spearow         Normal
Fearow          Normal
Ekans           Poison
Arbok           Poison
Pikachu       Electric
Raichu        Electric
Sandshrew       Ground
Sandslash       Ground
Nidoran         Poison
Nidorina        Poison
                ...   
Clauncher        Water
Clawitzer        Water
Helioptile    Electric
Heliolisk     Electric
Tyrunt            Rock
Tyrantrum         Rock
Amaura            Rock
Aurorus           Rock
Sylveon          Fairy
Hawlucha      Fighting
Dedenne       Electric
Carbink           Rock
Goo

In [85]:
#even though there's no explicit index, there is one implicitly.
poke[0]

'Grass'

In [87]:
poke[-10:]

Pokemon
Bergmite         Ice
Avalugg          Ice
Noibat        Flying
Noivern       Flying
Xerneas        Fairy
Yveltal         Dark
Zygarde       Dragon
Diancie         Rock
Hoopa        Psychic
Volcanion       Fire
Name: Type, dtype: object

In [88]:
#this syntax also works for regular syntax labels.  e.g., 
poke['Avalugg']

'Ice'

In [90]:
#or for multiple extracts
poke[['Xerneas','Diancie',"Volcanion"]]

Pokemon
Xerneas      Fairy
Diancie       Rock
Volcanion     Fire
Name: Type, dtype: object