In [11]:
# import dependencies
import pandas as pd
import csv
from datetime import datetime
import numpy as np

In [12]:
# read in csv
cities_df = pd.read_csv("resources/cities.csv")
cities_df

Unnamed: 0,city_id,city,cloudiness,country,date,humidity,lat,lng,maxtemp,windspeed
0,4034551,Faanui,17,PF,1579884368,74.0,-16.48,-151.75,28.00,6.50
1,5122534,Jamestown,90,US,1579884368,41.0,42.10,-79.24,7.22,5.10
2,4032243,Vaini,40,TO,1579884368,83.0,-21.20,-175.20,25.00,3.60
3,710408,Chopovychi,99,UA,1579884368,73.0,50.83,28.95,0.41,6.35
4,5384339,Port Hueneme,1,US,1579884328,66.0,34.15,-119.20,20.56,1.50
...,...,...,...,...,...,...,...,...,...,...
556,2026023,Bukachacha,0,RU,1579884429,98.0,52.98,116.92,-28.95,1.03
557,5127305,Monroe,75,US,1579884429,41.0,43.15,-77.68,7.22,4.10
558,1797535,Qiongshan,31,CN,1579884429,94.0,20.01,110.35,20.00,3.00
559,3663693,Jutai,100,BR,1579884429,87.0,-5.18,-68.90,27.07,1.78


In [13]:
# delete extraneous column
del cities_df['city_id']
cities_df

# drop cities with missing info
cities_df = cities_df.dropna(how='any')

# check to see if df complete
cities_df.count()

city          557
cloudiness    557
country       557
date          557
humidity      557
lat           557
lng           557
maxtemp       557
windspeed     557
dtype: int64

In [14]:
# convert unix date/time stamp to readable format
cities_df['date'] = pd.to_datetime(cities_df['date'],unit='s')
cities_df['date'] = cities_df['date'].dt.date

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [15]:
# add number column
cities_df['Number'] = np.arange(0,len(cities_df))
cities_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,city,cloudiness,country,date,humidity,lat,lng,maxtemp,windspeed,Number
0,Faanui,17,PF,2020-01-24,74.0,-16.48,-151.75,28.00,6.50,0
1,Jamestown,90,US,2020-01-24,41.0,42.10,-79.24,7.22,5.10,1
2,Vaini,40,TO,2020-01-24,83.0,-21.20,-175.20,25.00,3.60,2
3,Chopovychi,99,UA,2020-01-24,73.0,50.83,28.95,0.41,6.35,3
4,Port Hueneme,1,US,2020-01-24,66.0,34.15,-119.20,20.56,1.50,4
...,...,...,...,...,...,...,...,...,...,...
556,Bukachacha,0,RU,2020-01-24,98.0,52.98,116.92,-28.95,1.03,552
557,Monroe,75,US,2020-01-24,41.0,43.15,-77.68,7.22,4.10,553
558,Qiongshan,31,CN,2020-01-24,94.0,20.01,110.35,20.00,3.00,554
559,Jutai,100,BR,2020-01-24,87.0,-5.18,-68.90,27.07,1.78,555


In [17]:
# name index column
cities_df.index.name = "City Id"

# reformat column names
cities_df = cities_df.rename(columns={"city": "City",
                                     "cloudiness": "Cloudiness",
                                     "country": "Country",
                                     "date": "Date",
                                     "humidity": "Humidity",
                                     "lat":"Latitude",
                                     "lng": "Longitude",
                                     "maxtemp": "Max Temperature (C)",
                                     "windspeed": "Wind Speed"})
# rearrange columns
cities_df = cities_df[["Number", 
                       "City", 
                       "Cloudiness", 
                       "Country",
                       "Date",
                       "Humidity",
                       "Latitude",
                       "Longitude",
                       "Max Temperature (C)",
                       "Wind Speed"]]
                                     

cities_df.style.hide_index()

Number,City,Cloudiness,Country,Date,Humidity,Latitude,Longitude,Max Temperature (C),Wind Speed
0,Faanui,17,PF,2020-01-24,74,-16.48,-151.75,28.0,6.5
1,Jamestown,90,US,2020-01-24,41,42.1,-79.24,7.22,5.1
2,Vaini,40,TO,2020-01-24,83,-21.2,-175.2,25.0,3.6
3,Chopovychi,99,UA,2020-01-24,73,50.83,28.95,0.41,6.35
4,Port Hueneme,1,US,2020-01-24,66,34.15,-119.2,20.56,1.5
5,Thompson,75,CA,2020-01-24,84,55.74,-97.86,-14.0,2.6
6,Santa Maria,40,BR,2020-01-24,61,-29.68,-53.81,27.0,5.1
7,Albany,75,US,2020-01-24,63,42.6,-73.97,7.78,1.45
8,Mataura,100,NZ,2020-01-24,93,-46.19,168.86,16.67,4.47
9,Ribeira Grande,20,PT,2020-01-24,62,38.52,-28.7,15.0,3.6


In [20]:
# convert and save df to html file and hide index
cities_df.to_html('cities_table.html', index=False)

In [21]:
# open and preview table in browser
!open cities_table.html