-
Notifications
You must be signed in to change notification settings - Fork 0
/
json2hive.sh
71 lines (52 loc) · 1.47 KB
/
json2hive.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#!/bin/bash
set -e
if [[ $# -lt 3 ]]; then
echo "usage: json2hive.sh url tbl app"
exit 1
fi
url=$1
tbl=$2
app=$3
echo -e "`date` \n url: $url, tbl: $tbl app: $app"
date=""
file=""
if [[ $url == http* ]]; then
echo "http resources"
date=`echo $url | rev | cut -d'/' -f1 | rev | grep -Eo "[[:digit:]]{4}-[[:digit:]]{2}-[[:digit:]]{2}"`
else
echo "file resources"
date = `echo $url | grep -Eo "[[:digit:]]{4}-[[:digit:]]{2}-[[:digit:]]{2}"`
fi
file=`echo $url | rev | cut -d'/' -f1 | rev"
if [[ -z $date ]]; then
date=`date +%Y-%m-%d`
echo "no date found from url, make it as today: $date"
fi
file="${app}_${file}"
hive_home=""
work_dir=""
tmp_dir="$work_dir/tmp"
cd $work_dir
[[ ! -f $tmp_dir ]] && mkdir -p $tmp_dir
csv_file_path = "$tmp_dir/$file"
rm -rf $csv_file_path
export PYTHON_VERSION=2.7
export PYTHONLOCALIZED=~/python$PYTHON_VERSION
export PYTHONUSERBASE=$PYTHONLOCALIZED
export PYTHONPATH=$PYTHONLOCALIZED/lib:$PYTHONLOCALIZED/lib/python$PYTHON_VERSION
./json2csv.py $url $csv_file_path
echo "json2csv done"
line_cnt=`wc -l $csv_file_path"
sed -i 's/,/\x01/g' $csv_file_path
pt="'"$date"'"
local_path="'"$csv_file_path"'"
$hive_home/bin/hive -e "alter table $tbl drop if exists partition(pt=$pt)"
$hive_home/bin/hive -e "load data local inpath $local_path into table $tbl partition (pt=$pt)"
cnt=`hive -e "select count(*) from $tbl where pt=$pt"`
if [[ $cnt -lt 1 ]]; then
echo "no rec in hive"
exit 1
else
echo "rec cnt: $cnt"
fi
echo "done `date`"