In [1]:
import pandas as pd

### Creating, Reading and Writing

In [1]:
#Exercise-1
fruits = pd.DataFrame({'Apples':[30], 'Bananas':[21]})
fruits

In [1]:
#Exercise-2
fruit_sales = pd.DataFrame({'Apples':[35, 41], 'Bananas':[21, 34]}, index=['2017 Sales', '2018 Sales'])
fruit_sales

In [1]:
#Exercise-3
ingredients = pd.Series(['4 cups', '1 cup', '2 large', '1 can'], index=['Flour', 'Milk', 'Eggs', 'Spam'], name='Dinner')
ingredients

In [1]:
#Exercise-4
reviews =pd.read_csv('../input//wine-reviews/winemag-data_first150k.csv', index_col=0)
reviews.head()

In [1]:
#Exercise-5
animals = pd.DataFrame({'Cows': [12, 20], 'Goats': [22, 19]}, index=['Year 1', 'Year 2'])
animals.to_csv('cows_and_goats.csv')

### Indexing, Selecting & Assigning

In [1]:
reviews = pd.read_csv("../input//wine-reviews/winemag-data-130k-v2.csv", index_col=0)

In [1]:
reviews.head()

In [1]:
reviews.shape

reviews.columns

In [1]:
reviews.info()

In [1]:
#Exercise-1
desc = reviews.loc[: ,'description']
print(desc)

In [1]:
#Exercise-2
first_description=reviews['description'][0]
print(first_description)

In [1]:
#Exercise-3
first_row = reviews.iloc[0]
print(first_row)

In [1]:
#Exercise-4
first_descriptions =pd.Series(reviews.iloc[0:10, 1])
print(first_descriptions)

In [1]:
#Exercise-5
sample_reviews = reviews.iloc[[1, 2, 3, 5, 8], :]
sample_reviews

In [1]:
#Exercise-6
df = reviews.loc[[0,1,10,100],['country','province','region_1','region_2']]
df

In [1]:
#Exercise-7
df = reviews.iloc[0:100][['country','variety']]
df

In [1]:
#Exercise-8
italian_wines = reviews[reviews['country']=='Italy']
italian_wines

In [1]:
#Exercise-9
top_oceania_wines = reviews[(reviews['points']>= 95) & ((reviews['country']=='Australia')|(reviews['country']=='New Zealand'))]
top_oceania_wines

### Summary Functions and Maps

In [1]:
#Exercise-1
median_points = reviews['points'].median()
print(median_points)

In [1]:
#Exercise-2
countries = reviews.country.unique()
print(countries)

In [1]:
#Exercise-3
reviews_per_country = reviews.country.value_counts()
reviews_per_country

In [1]:
#Exercise-4
centered_price = reviews.loc[:, 'price']-reviews.loc[:, 'price'].mean()
centered_price

In [1]:
#Exercise-5
bargain_wine = reviews.loc[(reviews.points/reviews.price).idxmax(), 'title']
print(bargain_wine)

In [1]:
#Exercise-6
fruity = reviews['description'].map(lambda x: 'fruity' in x)
tropical=reviews['description'].map(lambda x: 'tropical' in x)
descriptor_counts = pd.Series([tropical.sum(), fruity.sum()],index=['tropical','fruity'])
descriptor_counts

In [1]:
#Exercise-7
def ratings(row):
    if(row['country']=='Canada'):
        return 3
    elif(row['points']>=95):
        return 3
    elif(row['points']>=85 and row['points']<95):
        return 2
    else:
        return 1
star_ratings = reviews.apply(ratings,axis='columns')
star_ratings

### Grouping and Sorting

In [1]:
# Exercise-1
reviews_written = reviews.groupby('taster_twitter_handle').taster_twitter_handle.count()
reviews_written

In [1]:
# Exercise-2
best_rating_per_price = reviews.groupby("price").points.max()
best_rating_per_price

In [1]:
#Exercise-3
price_extremes =reviews.groupby("variety").price.agg(["min", 'max'])
price_extremes

In [1]:
#Exercise-4
sorted_varieties = price_extremes.sort_values(by=['min', 'max'], ascending=False)
sorted_varieties

In [1]:
#Exercise-5
reviewer_mean_ratings = reviews.groupby('taster_name').points.mean()
reviewer_mean_ratings

In [1]:
#Exercise-6
country_variety_counts = reviews.groupby(['country', 'variety']).size().sort_values(ascending=False)
country_variety_counts

# Data Types and Missing Values

In [1]:
#Exercise-1
dtype = reviews.points.dtype
print(dtype)

In [1]:
#Exercise-2
point_strings = reviews.points.astype(str)
point_strings

In [1]:
#Exercise-3
n_missing_prices = reviews.index.dtype
n_missing_prices

In [1]:
#Exercise-4
reviews.region_1.fillna("Unknown",inplace=True)
reviews_per_region = reviews.region_1.value_counts()
reviews_per_region

### Renaming and Combining

In [1]:
#Exercise-1
renamed = reviews.rename(columns={"region_1":"region", "region_2":"locale"})
renamed.head()

In [1]:
#Exercise-2
reindexed =reviews.rename_axis("wines",axis=0)
reindexed

In [1]:
gaming_products = pd.read_csv("../input/things-on-reddit/top-things/top-things/reddits/g/gaming.csv")
gaming_products['subreddit'] = "r/gaming"
movie_products = pd.read_csv("../input/things-on-reddit/top-things/top-things/reddits/m/movies.csv")
movie_products['subreddit'] = "r/movies"

In [1]:
gaming_products.head()

In [1]:
movie_products.head()

In [1]:
#Exercise-3
combined_products = pd.concat([gaming_products, movie_products])
combined_products.head()

In [1]:
powerlifting_meets = pd.read_csv("../input/powerlifting-database/meets.csv")
powerlifting_competitors = pd.read_csv("../input/powerlifting-database/openpowerlifting.csv")

In [1]:
#Exercise-4
left = powerlifting_meets.set_index(["MeetID"])
right = powerlifting_competitors.set_index(["MeetID"])
powerlifting_combined = left.join(right, lsuffix="_meets", rsuffix="_competitors")
powerlifting_combined.head()