# Get the 10 Largest Tech Companies by Market Cap

This notebook fetches the 10 largest technology companies by market capitalization from the Rice Data Portal.

In [None]:
from rice_data_client import RiceDataClient
import pandas as pd

# Initialize client with your access token
ACCESS_TOKEN = "your_access_token_here"  # Replace with your token from data-portal.rice-business.org
client = RiceDataClient(access_token=ACCESS_TOKEN)

## Query Largest Tech Companies

We'll:
1. Join the DAILY table (for market cap) with TICKERS table (for sector info)
2. Filter for Technology sector
3. Use the most recent date available
4. Sort by market cap and get top 10

**Note**: Market cap values in the DAILY table are in thousands of dollars.

In [None]:
# Query the 10 largest tech companies
sql = """
SELECT 
    d.ticker,
    t.name,
    t.sector,
    t.industry,
    d.date::DATE as date,
    d.marketcap,
    ROUND(d.marketcap / 1000000, 2) as marketcap_billions
FROM daily d
INNER JOIN tickers t ON d.ticker = t.ticker
WHERE t.sector = 'Technology'
  AND d.date = (SELECT MAX(date) FROM daily)
  AND d.marketcap > 0
ORDER BY d.marketcap DESC
LIMIT 10
"""

df = client.query(sql)

# Convert date to datetime
df['date'] = pd.to_datetime(df['date'])

print(f"Data as of: {df['date'].iloc[0]}")
print(f"\nNote: Market cap values are in thousands of dollars in the database.")
print(f"The 'marketcap_billions' column shows values in billions for readability.")

In [None]:
# Display the results
print("\n10 Largest Technology Companies by Market Cap:")
print("="*80)
df[['ticker', 'name', 'industry', 'marketcap_billions']].to_string(index=False)

In [None]:
# Full dataframe view
df

## Save Data as Parquet File

In [None]:
# What filename would you like to use to save this data?
filename = "largest_tech_companies.parquet"

df.to_parquet(filename)
print(f"Data saved to {filename} ({len(df)} rows)")

## Visualize Market Cap Distribution

In [None]:
import matplotlib.pyplot as plt

# Create horizontal bar chart
fig, ax = plt.subplots(figsize=(12, 8))

# Sort by market cap for better visualization (already sorted, but ensuring)
df_sorted = df.sort_values('marketcap_billions', ascending=True)

# Create bars
bars = ax.barh(df_sorted['ticker'], df_sorted['marketcap_billions'], color='#0078D4', alpha=0.8)

# Customize plot
ax.set_xlabel('Market Cap ($ Billions)', fontsize=12, fontweight='bold')
ax.set_ylabel('Ticker', fontsize=12, fontweight='bold')
ax.set_title(f'10 Largest Technology Companies by Market Cap\nAs of {df["date"].iloc[0].strftime("%Y-%m-%d")}', 
             fontsize=14, fontweight='bold', pad=20)

# Add value labels on bars
for i, (bar, value) in enumerate(zip(bars, df_sorted['marketcap_billions'])):
    ax.text(value, bar.get_y() + bar.get_height()/2, 
            f'${value:,.0f}B', 
            ha='left', va='center', fontsize=10, fontweight='bold', 
            bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8))

ax.grid(True, alpha=0.3, axis='x')
plt.tight_layout()
plt.show()

In [None]:
# Summary statistics
print("\nMarket Cap Summary (in billions):")
print(f"Total Market Cap: ${df['marketcap_billions'].sum():,.2f}B")
print(f"Average Market Cap: ${df['marketcap_billions'].mean():,.2f}B")
print(f"Median Market Cap: ${df['marketcap_billions'].median():,.2f}B")
print(f"\nLargest: {df.iloc[0]['ticker']} - ${df.iloc[0]['marketcap_billions']:,.2f}B")
print(f"10th Largest: {df.iloc[-1]['ticker']} - ${df.iloc[-1]['marketcap_billions']:,.2f}B")

## Industry Breakdown

In [None]:
# Count by industry
industry_counts = df['industry'].value_counts()
print("\nIndustry Distribution:")
print(industry_counts)